diff --git a/.fernignore b/.fernignore index 112f779b..fd7adc81 100644 --- a/.fernignore +++ b/.fernignore @@ -13,10 +13,13 @@ mypy.ini README.md src/humanloop/decorators src/humanloop/otel +src/humanloop/sync +src/humanloop/cli +pytest.ini ## Tests -tests/ +tests/custom ## CI diff --git a/.gitignore b/.gitignore index a55ede77..f5cda9d9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ poetry.toml .env tests/assets/*.jsonl tests/assets/*.parquet +# Ignore humanloop directory which could mistakenly be committed when testing sync functionality as it's used as the default sync directory +humanloop diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..8ab80e5d --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -n auto diff --git a/tests/integration/__init__.py b/src/humanloop/cli/__init__.py similarity index 100% rename from tests/integration/__init__.py rename to src/humanloop/cli/__init__.py diff --git a/src/humanloop/cli/__main__.py b/src/humanloop/cli/__main__.py new file mode 100644 index 00000000..3ab53cfb --- /dev/null +++ b/src/humanloop/cli/__main__.py @@ -0,0 +1,250 @@ +import logging +import os +import sys +import time +from functools import wraps +from typing import Callable, Optional + +import click +from dotenv import load_dotenv + +from humanloop import Humanloop +from humanloop.sync.sync_client import SyncClient + +# Set up logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # Set back to INFO level +console_handler = logging.StreamHandler() +formatter = logging.Formatter("%(message)s") # Simplified formatter +console_handler.setFormatter(formatter) +if not logger.hasHandlers(): + logger.addHandler(console_handler) + +# Color constants +SUCCESS_COLOR = "green" +ERROR_COLOR = "red" +INFO_COLOR = "blue" +WARNING_COLOR = "yellow" + + +def load_api_key(env_file: Optional[str] = None) -> str: + """Load API key from .env file or environment variable. + + Args: + env_file: Optional path to .env file + + Returns: + str: The loaded API key + + Raises: + click.ClickException: If no API key is found + """ + # Try specific .env file if provided, otherwise default to .env in current directory + if env_file: + if not load_dotenv(env_file): # load_dotenv returns False if file not found/invalid + raise click.ClickException( + click.style( + f"Failed to load environment file: {env_file} (file not found or invalid format)", + fg=ERROR_COLOR, + ) + ) + else: + load_dotenv() # Attempt to load from default .env in current directory + + # Get API key from environment + api_key = os.getenv("HUMANLOOP_API_KEY") + if not api_key: + raise click.ClickException( + click.style( + "No API key found. Set HUMANLOOP_API_KEY in .env file or environment, or use --api-key", fg=ERROR_COLOR + ) + ) + + return api_key + + +def get_client( + api_key: Optional[str] = None, env_file: Optional[str] = None, base_url: Optional[str] = None +) -> Humanloop: + """Instantiate a Humanloop client for the CLI. + + Args: + api_key: Optional API key provided directly + env_file: Optional path to .env file + base_url: Optional base URL for the API + + Returns: + Humanloop: Configured client instance + + Raises: + click.ClickException: If no API key is found + """ + if not api_key: + api_key = load_api_key(env_file) + return Humanloop(api_key=api_key, base_url=base_url) + + +def common_options(f: Callable) -> Callable: + """Decorator for common CLI options.""" + + @click.option( + "--api-key", + help="Humanloop API key. If not provided, uses HUMANLOOP_API_KEY from .env or environment.", + default=None, + show_default=False, + ) + @click.option( + "--env-file", + help="Path to .env file. If not provided, looks for .env in current directory.", + default=None, + type=click.Path(exists=True), + show_default=False, + ) + @click.option( + "--local-files-directory", + "--local-dir", + help="Directory (relative to the current working directory) where Humanloop files are stored locally (default: humanloop/).", + default="humanloop", + type=click.Path(), + ) + @click.option( + "--base-url", + default=None, + hidden=True, + ) + @wraps(f) + def wrapper(*args, **kwargs): + return f(*args, **kwargs) + + return wrapper + + +def handle_sync_errors(f: Callable) -> Callable: + """Decorator for handling sync operation errors. + + If an error occurs in any operation that uses this decorator, it will be logged and the program will exit with a non-zero exit code. + """ + + @wraps(f) + def wrapper(*args, **kwargs): + try: + return f(*args, **kwargs) + except Exception as e: + click.echo(click.style(str(f"Error: {e}"), fg=ERROR_COLOR)) + sys.exit(1) + + return wrapper + + +@click.group( + help="Humanloop CLI for managing sync operations.", + context_settings={ + "help_option_names": ["-h", "--help"], + "max_content_width": 100, + }, +) +def cli(): # Does nothing because used as a group for other subcommands (pull, push, etc.) + """Humanloop CLI for managing sync operations.""" + pass + + +@cli.command() +@click.option( + "--path", + "-p", + help="Path in the Humanloop workspace to pull from (file or directory). You can pull an entire directory (e.g. 'my/directory') " + "or a specific file (e.g. 'my/directory/my_prompt.prompt'). When pulling a directory, all files within that directory and its subdirectories will be included. " + "If not specified, pulls from the root of the remote workspace.", + default=None, +) +@click.option( + "--environment", + "-e", + help="Environment to pull from (e.g. 'production', 'staging')", + default=None, +) +@click.option( + "--verbose", + "-v", + is_flag=True, + help="Show detailed information about the operation", +) +@click.option( + "--quiet", + "-q", + is_flag=True, + help="Suppress output of successful files", +) +@handle_sync_errors +@common_options +def pull( + path: Optional[str], + environment: Optional[str], + api_key: Optional[str], + env_file: Optional[str], + local_files_directory: str, + base_url: Optional[str], + verbose: bool, + quiet: bool, +): + """Pull Prompt and Agent files from Humanloop to your local filesystem. + + \b + This command will: + 1. Fetch Prompt and Agent files from your Humanloop workspace + 2. Save them to your local filesystem (directory specified by --local-files-directory, default: humanloop/) + 3. Maintain the same directory structure as in Humanloop + 4. Add appropriate file extensions (.prompt or .agent) + + \b + For example, with the default --local-files-directory=humanloop, files will be saved as: + ./humanloop/ + ├── my_project/ + │ ├── prompts/ + │ │ ├── my_prompt.prompt + │ │ └── nested/ + │ │ └── another_prompt.prompt + │ └── agents/ + │ └── my_agent.agent + └── another_project/ + └── prompts/ + └── other_prompt.prompt + + \b + If you specify --local-files-directory=data/humanloop, files will be saved in ./data/humanloop/ instead. + + If a file exists both locally and in the Humanloop workspace, the local file will be overwritten + with the version from Humanloop. Files that only exist locally will not be affected. + + Currently only supports syncing Prompt and Agent files. Other file types will be skipped.""" + client = get_client(api_key, env_file, base_url) + sync_client = SyncClient( + client, base_dir=local_files_directory, log_level=logging.DEBUG if verbose else logging.WARNING + ) + + click.echo(click.style("Pulling files from Humanloop...", fg=INFO_COLOR)) + click.echo(click.style(f"Path: {path or '(root)'}", fg=INFO_COLOR)) + click.echo(click.style(f"Environment: {environment or '(default)'}", fg=INFO_COLOR)) + + start_time = time.time() + successful_files, failed_files = sync_client.pull(path, environment) + duration_ms = int((time.time() - start_time) * 1000) + + # Determine if the operation was successful based on failed_files + is_successful = not failed_files + duration_color = SUCCESS_COLOR if is_successful else ERROR_COLOR + click.echo(click.style(f"Pull completed in {duration_ms}ms", fg=duration_color)) + + if successful_files and not quiet: + click.echo(click.style(f"\nSuccessfully pulled {len(successful_files)} files:", fg=SUCCESS_COLOR)) + for file in successful_files: + click.echo(click.style(f" ✓ {file}", fg=SUCCESS_COLOR)) + + if failed_files: + click.echo(click.style(f"\nFailed to pull {len(failed_files)} files:", fg=ERROR_COLOR)) + for file in failed_files: + click.echo(click.style(f" ✗ {file}", fg=ERROR_COLOR)) + + +if __name__ == "__main__": + cli() diff --git a/src/humanloop/client.py b/src/humanloop/client.py index 74cd6c97..ab6b2abc 100644 --- a/src/humanloop/client.py +++ b/src/humanloop/client.py @@ -1,6 +1,7 @@ import os import typing -from typing import Any, List, Optional, Sequence +from typing import Any, List, Optional, Sequence, Tuple +import logging import httpx from opentelemetry.sdk.resources import Resource @@ -18,7 +19,7 @@ ) from humanloop.base_client import AsyncBaseHumanloop, BaseHumanloop -from humanloop.overload import overload_call, overload_log +from humanloop.overload import overload_client from humanloop.decorators.flow import flow as flow_decorator_factory from humanloop.decorators.prompt import prompt_decorator_factory from humanloop.decorators.tool import tool_decorator_factory as tool_decorator_factory @@ -29,6 +30,9 @@ from humanloop.otel.processor import HumanloopSpanProcessor from humanloop.prompt_utils import populate_template from humanloop.prompts.client import PromptsClient +from humanloop.sync.sync_client import SyncClient, DEFAULT_CACHE_SIZE + +logger = logging.getLogger("humanloop.sdk") class ExtendedEvalsClient(EvaluationsClient): @@ -87,8 +91,9 @@ class Humanloop(BaseHumanloop): """ See docstring of :class:`BaseHumanloop`. - This class extends the base client with custom evaluation utilities - and decorators for declaring Files in code. + This class extends the base client with custom evaluation utilities, + decorators for declaring Files in code, and utilities for syncing + files between Humanloop and local filesystem. """ def __init__( @@ -102,6 +107,9 @@ def __init__( httpx_client: typing.Optional[httpx.Client] = None, opentelemetry_tracer_provider: Optional[TracerProvider] = None, opentelemetry_tracer: Optional[Tracer] = None, + use_local_files: bool = False, + local_files_directory: str = "humanloop", + cache_size: int = DEFAULT_CACHE_SIZE, ): """ Extends the base client with custom evaluation utilities and @@ -111,6 +119,27 @@ def __init__( You can provide a TracerProvider and a Tracer to integrate with your existing telemetry system. If not provided, an internal TracerProvider will be used. + + Parameters + ---------- + base_url: Optional base URL for the API + environment: The environment to use (default: DEFAULT) + api_key: Your Humanloop API key (default: from HUMANLOOP_API_KEY env var) + timeout: Optional timeout for API requests + follow_redirects: Whether to follow redirects + httpx_client: Optional custom httpx client + opentelemetry_tracer_provider: Optional tracer provider for telemetry + opentelemetry_tracer: Optional tracer for telemetry + use_local_files: Whether to use local files for prompts and agents + local_files_directory: Base directory where local prompt and agent files are stored (default: "humanloop"). + This is relative to the current working directory. For example: + - "humanloop" will look for files in "./humanloop/" + - "data/humanloop" will look for files in "./data/humanloop/" + When using paths in the API, they must be relative to this directory. For example, + if local_files_directory="humanloop" and you have a file at "humanloop/samples/test.prompt", + you would reference it as "samples/test" in your code. + cache_size: Maximum number of files to cache when use_local_files is True (default: DEFAULT_CACHE_SIZE). + This parameter has no effect if use_local_files is False. """ super().__init__( base_url=base_url, @@ -121,6 +150,17 @@ def __init__( httpx_client=httpx_client, ) + self.use_local_files = use_local_files + + # Warn user if cache_size is non-default but use_local_files is False — has no effect and will therefore be ignored + if not self.use_local_files and cache_size != DEFAULT_CACHE_SIZE: + logger.warning( + f"The specified cache_size={cache_size} will have no effect because use_local_files=False. " + f"File caching is only active when local files are enabled." + ) + + # Check if cache_size is non-default but use_local_files is False + self._sync_client = SyncClient(client=self, base_dir=local_files_directory, cache_size=cache_size) eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper) eval_client.client = self self.evaluations = eval_client @@ -128,10 +168,14 @@ def __init__( # Overload the .log method of the clients to be aware of Evaluation Context # and the @flow decorator providing the trace_id - self.prompts = overload_log(client=self.prompts) - self.prompts = overload_call(client=self.prompts) - self.flows = overload_log(client=self.flows) - self.tools = overload_log(client=self.tools) + self.prompts = overload_client( + client=self.prompts, sync_client=self._sync_client, use_local_files=self.use_local_files + ) + self.agents = overload_client( + client=self.agents, sync_client=self._sync_client, use_local_files=self.use_local_files + ) + self.flows = overload_client(client=self.flows) + self.tools = overload_client(client=self.tools) if opentelemetry_tracer_provider is not None: self._tracer_provider = opentelemetry_tracer_provider @@ -351,6 +395,53 @@ def agent(): attributes=attributes, ) + def pull(self, path: Optional[str] = None, environment: Optional[str] = None) -> Tuple[List[str], List[str]]: + """Pull Prompt and Agent files from Humanloop to local filesystem. + + This method will: + 1. Fetch Prompt and Agent files from your Humanloop workspace + 2. Save them to your local filesystem (directory specified by `local_files_directory`, default: "humanloop") + 3. Maintain the same directory structure as in Humanloop + 4. Add appropriate file extensions (`.prompt` or `.agent`) + + The path parameter can be used in two ways: + - If it points to a specific file (e.g. "path/to/file.prompt" or "path/to/file.agent"), only that file will be pulled + - If it points to a directory (e.g. "path/to/directory"), all Prompt and Agent files in that directory and its subdirectories will be pulled + - If no path is provided, all Prompt and Agent files will be pulled + + The operation will overwrite existing files with the latest version from Humanloop + but will not delete local files that don't exist in the remote workspace. + + Currently only supports syncing Prompt and Agent files. Other file types will be skipped. + + For example, with the default `local_files_directory="humanloop"`, files will be saved as: + ``` + ./humanloop/ + ├── my_project/ + │ ├── prompts/ + │ │ ├── my_prompt.prompt + │ │ └── nested/ + │ │ └── another_prompt.prompt + │ └── agents/ + │ └── my_agent.agent + └── another_project/ + └── prompts/ + └── other_prompt.prompt + ``` + + If you specify `local_files_directory="data/humanloop"`, files will be saved in ./data/humanloop/ instead. + + :param path: Optional path to either a specific file (e.g. "path/to/file.prompt") or a directory (e.g. "path/to/directory"). + If not provided, all Prompt and Agent files will be pulled. + :param environment: The environment to pull the files from. + :return: Tuple of two lists: + - First list contains paths of successfully synced files + - Second list contains paths of files that failed to sync (due to API errors, missing content, + or filesystem issues) + :raises HumanloopRuntimeError: If there's an error communicating with the API + """ + return self._sync_client.pull(environment=environment, path=path) + class AsyncHumanloop(AsyncBaseHumanloop): """ diff --git a/src/humanloop/overload.py b/src/humanloop/overload.py index b0c83215..92c83e6b 100644 --- a/src/humanloop/overload.py +++ b/src/humanloop/overload.py @@ -1,54 +1,69 @@ import inspect import logging import types -from typing import TypeVar, Union +from typing import Any, Dict, Optional, Union, Callable from humanloop.context import ( get_decorator_context, get_evaluation_context, get_trace_id, ) -from humanloop.evals.run import HumanloopRuntimeError - -from humanloop.evaluators.client import EvaluatorsClient -from humanloop.flows.client import FlowsClient +from humanloop.error import HumanloopRuntimeError +from humanloop.sync.sync_client import SyncClient from humanloop.prompts.client import PromptsClient +from humanloop.flows.client import FlowsClient +from humanloop.datasets.client import DatasetsClient +from humanloop.agents.client import AgentsClient from humanloop.tools.client import ToolsClient +from humanloop.evaluators.client import EvaluatorsClient +from humanloop.types import FileType from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse from humanloop.types.create_flow_log_response import CreateFlowLogResponse from humanloop.types.create_prompt_log_response import CreatePromptLogResponse from humanloop.types.create_tool_log_response import CreateToolLogResponse from humanloop.types.prompt_call_response import PromptCallResponse +from humanloop.types.agent_call_response import AgentCallResponse logger = logging.getLogger("humanloop.sdk") - -CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, FlowsClient, EvaluatorsClient, ToolsClient) - - -def overload_log(client: CLIENT_TYPE) -> CLIENT_TYPE: - """ - Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT. - - This makes the overloaded log actions be aware of whether the created Log is - part of an Evaluation (e.g. one started by eval_utils.run_eval). - """ - # Copy the original log method in a hidden attribute - client._log = client.log # type: ignore [attr-defined] - - def _overload_log( - # It's safe to only consider kwargs since the original - # log method bans positional arguments - self, - **kwargs, - ) -> Union[ - CreatePromptLogResponse, - CreateToolLogResponse, - CreateFlowLogResponse, - CreateEvaluatorLogResponse, - ]: - trace_id = get_trace_id() - if trace_id is not None and type(client) is FlowsClient: +LogResponseType = Union[ + CreatePromptLogResponse, + CreateToolLogResponse, + CreateFlowLogResponse, + CreateEvaluatorLogResponse, +] + +CallResponseType = Union[ + PromptCallResponse, + AgentCallResponse, +] + + +def _get_file_type_from_client( + client: Union[PromptsClient, AgentsClient, ToolsClient, FlowsClient, DatasetsClient, EvaluatorsClient], +) -> FileType: + """Get the file type based on the client type.""" + if isinstance(client, PromptsClient): + return "prompt" + elif isinstance(client, AgentsClient): + return "agent" + elif isinstance(client, ToolsClient): + return "tool" + elif isinstance(client, FlowsClient): + return "flow" + elif isinstance(client, DatasetsClient): + return "dataset" + elif isinstance(client, EvaluatorsClient): + return "evaluator" + + raise ValueError(f"Unsupported client type: {type(client)}") + + +def _handle_tracing_context(kwargs: Dict[str, Any], client: Any) -> Dict[str, Any]: + """Handle tracing context for both log and call methods.""" + trace_id = get_trace_id() + if trace_id is not None: + if "flow" in str(type(client).__name__).lower(): context = get_decorator_context() if context is None: raise HumanloopRuntimeError("Internal error: trace_id context is set outside a decorator context.") @@ -56,69 +71,146 @@ def _overload_log( f"Using `flows.log()` is not allowed: Flow decorator " f"for File {context.path} manages the tracing and trace completion." ) - if trace_id is not None: - if "trace_parent_id" in kwargs: - logger.warning( - "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.", - inspect.currentframe().f_lineno, # type: ignore [union-attr] - ) - kwargs = { - **kwargs, - "trace_parent_id": trace_id, - } - evaluation_context = get_evaluation_context() - if evaluation_context is not None: - kwargs_eval, eval_callback = evaluation_context.log_args_with_context( - path=kwargs.get("path"), log_args=kwargs - ) - try: - response = self._log(**kwargs_eval) - except Exception as e: - # Re-raising as HumanloopDecoratorError so the decorators don't catch it - raise HumanloopRuntimeError from e - if eval_callback is not None: - eval_callback(response.id) - else: - try: - response = self._log(**kwargs) - except Exception as e: - # Re-raising as HumanloopDecoratorError so the decorators don't catch it - raise HumanloopRuntimeError from e - - return response - # Replace the original log method with the overloaded one - client.log = types.MethodType(_overload_log, client) # type: ignore [assignment] - # Return the client with the overloaded log method - logger.debug("Overloaded the .call method of %s", client) - return client + if "trace_parent_id" in kwargs: + logger.warning( + "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.", + inspect.currentframe().f_lineno, # type: ignore[union-attr] + ) + kwargs = { + **kwargs, + "trace_parent_id": trace_id, + } + return kwargs + + +def _handle_local_files( + kwargs: Dict[str, Any], + client: Any, + sync_client: Optional[SyncClient], + use_local_files: bool, +) -> Dict[str, Any]: + """Handle local file loading if enabled.""" + if not use_local_files or "path" not in kwargs or sync_client is None: + return kwargs + + if "id" in kwargs: + raise HumanloopRuntimeError("Can only specify one of `id` or `path`") + + # Check if version_id or environment is specified + use_remote = any(["version_id" in kwargs, "environment" in kwargs]) + normalized_path = sync_client._normalize_path(kwargs["path"]) + + if use_remote: + raise HumanloopRuntimeError( + f"Cannot use local file for `{normalized_path}` as version_id or environment was specified. " + "Please either remove version_id/environment to use local files, or set use_local_files=False to use remote files." + ) + + file_type = _get_file_type_from_client(client) + if file_type not in SyncClient.SERIALIZABLE_FILE_TYPES: + raise HumanloopRuntimeError(f"Local files are not supported for `{file_type}` files.") + + # If file_type is already specified in kwargs, it means user provided a PromptKernelRequestParams object + if file_type in kwargs and not isinstance(kwargs[file_type], str): + logger.warning( + f"Ignoring local file for `{normalized_path}` as {file_type} parameters were directly provided. " + "Using provided parameters instead." + ) + return kwargs + + try: + file_content = sync_client.get_file_content(normalized_path, file_type) # type: ignore[arg-type] # file_type was checked above + kwargs[file_type] = file_content + except HumanloopRuntimeError as e: + raise HumanloopRuntimeError(f"Failed to use local file for `{normalized_path}`: {str(e)}") + + return kwargs + + +def _handle_evaluation_context(kwargs: Dict[str, Any]) -> tuple[Dict[str, Any], Optional[Callable[[str], None]]]: + """Handle evaluation context for logging.""" + evaluation_context = get_evaluation_context() + if evaluation_context is not None: + return evaluation_context.log_args_with_context(path=kwargs.get("path"), log_args=kwargs) + return kwargs, None + + +def _overload_log(self: Any, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> LogResponseType: + try: + # Special handling for flows - prevent direct log usage + if type(self) is FlowsClient and get_trace_id() is not None: + context = get_decorator_context() + if context is None: + raise HumanloopRuntimeError("Internal error: trace_id context is set outside a decorator context.") + raise HumanloopRuntimeError( + f"Using `flows.log()` is not allowed: Flow decorator " + f"for File {context.path} manages the tracing and trace completion." + ) + kwargs = _handle_tracing_context(kwargs, self) -def overload_call(client: PromptsClient) -> PromptsClient: - client._call = client.call # type: ignore [attr-defined] - - def _overload_call(self, **kwargs) -> PromptCallResponse: - # None if not logging inside a decorator - trace_id = get_trace_id() - if trace_id is not None: - if "trace_parent_id" in kwargs: - logger.warning( - "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.", - inspect.currentframe().f_lineno, # type: ignore [union-attr] - ) - kwargs = { - **kwargs, - "trace_parent_id": trace_id, - } - - try: - response = self._call(**kwargs) - except Exception as e: - # Re-raising as HumanloopDecoratorError so the decorators don't catch it - raise HumanloopRuntimeError from e + # Handle local files for Prompts and Agents clients + if _get_file_type_from_client(self) in ["prompt", "agent"]: + if sync_client is None: + logger.error("sync_client is None but client has log method and use_local_files=%s", use_local_files) + raise HumanloopRuntimeError("sync_client is required for clients that support local file operations") + kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files) + kwargs, eval_callback = _handle_evaluation_context(kwargs) + response = self._log(**kwargs) # Use stored original method + if eval_callback is not None: + eval_callback(response.id) return response + except HumanloopRuntimeError: + # Re-raise HumanloopRuntimeError without wrapping to preserve the message + raise + except Exception as e: + # Only wrap non-HumanloopRuntimeError exceptions + raise HumanloopRuntimeError from e + + +def _overload_call(self: Any, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> CallResponseType: + try: + kwargs = _handle_tracing_context(kwargs, self) + kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files) + return self._call(**kwargs) # Use stored original method + except HumanloopRuntimeError: + # Re-raise HumanloopRuntimeError without wrapping to preserve the message + raise + except Exception as e: + # Only wrap non-HumanloopRuntimeError exceptions + raise HumanloopRuntimeError from e + + +def overload_client( + client: Any, + sync_client: Optional[SyncClient] = None, + use_local_files: bool = False, +) -> Any: + """Overloads client methods to add tracing, local file handling, and evaluation context.""" + # Store original log method as _log for all clients. Used in flow decorator + if hasattr(client, "log") and not hasattr(client, "_log"): + client._log = client.log # type: ignore[attr-defined] + + # Create a closure to capture sync_client and use_local_files + def log_wrapper(self: Any, **kwargs) -> LogResponseType: + return _overload_log(self, sync_client, use_local_files, **kwargs) + + client.log = types.MethodType(log_wrapper, client) + + # Overload call method for Prompt and Agent clients + if _get_file_type_from_client(client) in ["prompt", "agent"]: + if sync_client is None and use_local_files: + logger.error("sync_client is None but client has call method and use_local_files=%s", use_local_files) + raise HumanloopRuntimeError("sync_client is required for clients that support call operations") + if hasattr(client, "call") and not hasattr(client, "_call"): + client._call = client.call # type: ignore[attr-defined] + + # Create a closure to capture sync_client and use_local_files + def call_wrapper(self: Any, **kwargs) -> CallResponseType: + return _overload_call(self, sync_client, use_local_files, **kwargs) + + client.call = types.MethodType(call_wrapper, client) - # Replace the original log method with the overloaded one - client.call = types.MethodType(_overload_call, client) # type: ignore [assignment] return client diff --git a/src/humanloop/sync/__init__.py b/src/humanloop/sync/__init__.py new file mode 100644 index 00000000..007659df --- /dev/null +++ b/src/humanloop/sync/__init__.py @@ -0,0 +1,3 @@ +from humanloop.sync.sync_client import SyncClient + +__all__ = ["SyncClient"] diff --git a/src/humanloop/sync/sync_client.py b/src/humanloop/sync/sync_client.py new file mode 100644 index 00000000..b1cf091a --- /dev/null +++ b/src/humanloop/sync/sync_client.py @@ -0,0 +1,376 @@ +import logging +from pathlib import Path +from typing import List, Optional, Tuple, TYPE_CHECKING, Union +from functools import lru_cache +import typing +import time +from humanloop.error import HumanloopRuntimeError +import json + +if TYPE_CHECKING: + from humanloop.base_client import BaseHumanloop + +# Set up logging +logger = logging.getLogger("humanloop.sdk.sync") +logger.setLevel(logging.INFO) +console_handler = logging.StreamHandler() +formatter = logging.Formatter("%(message)s") +console_handler.setFormatter(formatter) +if not logger.hasHandlers(): + logger.addHandler(console_handler) + +# Default cache size for file content caching +DEFAULT_CACHE_SIZE = 100 + + +def format_api_error(error: Exception) -> str: + """Format API error messages to be more user-friendly.""" + error_msg = str(error) + if "status_code" not in error_msg or "body" not in error_msg: + return error_msg + + try: + # Extract the body part and parse as JSON + body_str = error_msg.split("body: ")[1] + # Convert Python dict string to valid JSON by: + # 1. Escaping double quotes + # 2. Replacing single quotes with double quotes + body_str = body_str.replace('"', '\\"').replace("'", '"') + body = json.loads(body_str) + + # Get the detail from the body + detail = body.get("detail", {}) + + # Handle both string and dictionary types for detail + if isinstance(detail, str): + return detail + elif isinstance(detail, dict): + return detail.get("description") or detail.get("msg") or error_msg + else: + return error_msg + except Exception as e: + logger.debug(f"Failed to parse error message: {str(e)}") + return error_msg + + +SerializableFileType = typing.Literal["prompt", "agent"] + + +class SyncClient: + """Client for managing synchronization between local filesystem and Humanloop. + + This client provides file synchronization between Humanloop and the local filesystem, + with built-in caching for improved performance. The cache uses Python's LRU (Least + Recently Used) cache to automatically manage memory usage by removing least recently + accessed files when the cache is full. + + The cache is automatically updated when files are pulled or saved, and can be + manually cleared using the clear_cache() method. + """ + + # File types that can be serialized to/from the filesystem + SERIALIZABLE_FILE_TYPES = frozenset(typing.get_args(SerializableFileType)) + + def __init__( + self, + client: "BaseHumanloop", + base_dir: str = "humanloop", + cache_size: int = DEFAULT_CACHE_SIZE, + log_level: int = logging.WARNING, + ): + """ + Parameters + ---------- + client: Humanloop client instance + base_dir: Base directory for synced files (default: "humanloop") + cache_size: Maximum number of files to cache (default: DEFAULT_CACHE_SIZE) + log_level: Log level for logging (default: WARNING) + """ + self.client = client + self.base_dir = Path(base_dir) + self._cache_size = cache_size + + logger.setLevel(log_level) + + # Create a new cached version of get_file_content with the specified cache size + self.get_file_content = lru_cache(maxsize=cache_size)( # type: ignore [assignment] + self._get_file_content_implementation, + ) + + def _get_file_content_implementation(self, path: str, file_type: SerializableFileType) -> str: + """Implementation of get_file_content without the cache decorator. + + This is the actual implementation that gets wrapped by lru_cache. + + Args: + path: The normalized path to the file (without extension) + file_type: The type of file to get the content of (SerializableFileType) + + Returns: + The raw file content + + Raises: + HumanloopRuntimeError: In two cases: + 1. If the file doesn't exist at the expected location + 2. If there's a filesystem error when trying to read the file + (e.g., permission denied, file is locked, etc.) + """ + # Construct path to local file + local_path = self.base_dir / path + # Add appropriate extension + local_path = local_path.parent / f"{local_path.stem}.{file_type}" + + if not local_path.exists(): + raise HumanloopRuntimeError(f"Local file not found: {local_path}") + + try: + # Read the raw file content + with open(local_path) as f: + file_content = f.read() + logger.debug(f"Using local file content from {local_path}") + return file_content + except Exception as e: + raise HumanloopRuntimeError(f"Error reading local file {local_path}: {str(e)}") + + def get_file_content(self, path: str, file_type: SerializableFileType) -> str: + """Get the raw file content of a file from cache or filesystem. + + This method uses an LRU cache to store file contents. When the cache is full, + the least recently accessed files are automatically removed to make space. + + Args: + path: The normalized path to the file (without extension) + file_type: The type of file (Prompt or Agent) + + Returns: + The raw file content + + Raises: + HumanloopRuntimeError: If the file doesn't exist or can't be read + """ + return self._get_file_content_implementation(path, file_type) + + def clear_cache(self) -> None: + """Clear the LRU cache.""" + self.get_file_content.cache_clear() # type: ignore [attr-defined] + + def _normalize_path(self, path: str) -> str: + """Normalize the path by: + 1. Converting to a Path object to handle platform-specific separators + 2. Removing any file extensions + 3. Converting to a string with forward slashes and no leading/trailing slashes + """ + # Convert to Path object to handle platform-specific separators + path_obj = Path(path) + + # Reject absolute paths to ensure all paths are relative to base_dir. + # This maintains consistency with the remote filesystem where paths are relative to project root. + if path_obj.is_absolute(): + raise HumanloopRuntimeError( + f"Absolute paths are not supported: `{path}`. " + f"Paths should be relative to the base directory (`{self.base_dir}`)." + ) + + # Remove extension, convert to string with forward slashes, and remove leading/trailing slashes + normalized = str(path_obj.with_suffix("")) + # Replace all backslashes and normalize multiple forward slashes + return "/".join(part for part in normalized.replace("\\", "/").split("/") if part) + + def is_file(self, path: str) -> bool: + """Check if the path is a file by checking for .{file_type} extension for serializable file types.""" + return path.endswith(tuple(f".{file_type}" for file_type in self.SERIALIZABLE_FILE_TYPES)) + + def _save_serialized_file( + self, + serialized_content: str, + file_path: str, + file_type: SerializableFileType, + ) -> None: + """Save serialized file to local filesystem.""" + try: + # Create full path including base_dir prefix + full_path = self.base_dir / file_path + # Create directory if it doesn't exist + full_path.parent.mkdir(parents=True, exist_ok=True) + + # Add file type extension + new_path = full_path.parent / f"{full_path.stem}.{file_type}" + + # Write raw file content to file + with open(new_path, "w") as f: + f.write(serialized_content) + except Exception as e: + logger.error(f"Failed to write {file_type} {file_path} to disk: {str(e)}") + raise + + def _pull_file(self, path: str, environment: Optional[str] = None) -> bool: + """Pull a specific file from Humanloop to local filesystem. + + Returns: + True if the file was successfully pulled, False otherwise + """ + try: + file = self.client.files.retrieve_by_path( + path=path, + environment=environment, + include_raw_file_content=True, + ) + + if file.type not in self.SERIALIZABLE_FILE_TYPES: + logger.error(f"Unsupported file type: {file.type}") + return False + + if not file.raw_file_content: # type: ignore [union-attr] + logger.error(f"No content found for {file.type} {path}") + return False + + self._save_serialized_file( + serialized_content=file.raw_file_content, # type: ignore [union-attr] + file_path=file.path, + file_type=typing.cast(SerializableFileType, file.type), + ) + return True + except Exception as e: + logger.error(f"Failed to pull file {path}: {str(e)}") + return False + + def _pull_directory( + self, + path: Optional[str] = None, + environment: Optional[str] = None, + ) -> Tuple[List[str], List[str]]: + """Sync Prompt and Agent files from Humanloop to local filesystem. + + Returns: + Tuple of two lists: + - First list contains paths of successfully synced files + - Second list contains paths of files that failed to sync. + Failures can occur due to missing content in the response or errors during local file writing. + + Raises: + HumanloopRuntimeError: If there's an error communicating with the API + """ + successful_files = [] + failed_files = [] + page = 1 + + logger.debug(f"Fetching files from directory: {path or '(root)'} in environment: {environment or '(default)'}") + + while True: + try: + logger.debug(f"`{path}`: Requesting page {page} of files") + response = self.client.files.list_files( + type=list(self.SERIALIZABLE_FILE_TYPES), + page=page, + size=100, + include_raw_file_content=True, + environment=environment, + path=path, + ) + + if len(response.records) == 0: + logger.debug(f"Finished reading files for path `{path}`") + break + + logger.debug(f"`{path}`: Read page {page} containing {len(response.records)} files") + + # Process each file + for file in response.records: + # Skip if not a serializable file type + if file.type not in self.SERIALIZABLE_FILE_TYPES: + logger.warning(f"Skipping unsupported file type: {file.type}") + continue + + file_type: SerializableFileType = typing.cast( + SerializableFileType, + file.type, + ) + + # Skip if no raw file content + if not getattr(file, "raw_file_content", None) or not file.raw_file_content: # type: ignore [union-attr] + logger.warning(f"No content found for {file.type} {file.path}") + failed_files.append(file.path) + continue + + try: + logger.debug(f"Writing {file.type} {file.path} to disk") + self._save_serialized_file( + serialized_content=file.raw_file_content, # type: ignore [union-attr] + file_path=file.path, + file_type=file_type, + ) + successful_files.append(file.path) + except Exception as e: + failed_files.append(file.path) + logger.error(f"Failed to save {file.path}: {str(e)}") + + page += 1 + except Exception as e: + formatted_error = format_api_error(e) + raise HumanloopRuntimeError(f"Failed to fetch page {page}: {formatted_error}") + + if successful_files: + logger.info(f"Successfully pulled {len(successful_files)} files") + if failed_files: + logger.warning(f"Failed to pull {len(failed_files)} files") + + return successful_files, failed_files + + def pull(self, path: Optional[str] = None, environment: Optional[str] = None) -> Tuple[List[str], List[str]]: + """Pull files from Humanloop to local filesystem. + + If the path ends with .prompt or .agent, pulls that specific file. + Otherwise, pulls all files under the specified path. + If no path is provided, pulls all files from the root. + + Args: + path: The path to pull from (either a specific file or directory) + environment: The environment to pull from + + Returns: + Tuple of two lists: + - First list contains paths of successfully synced files + - Second list contains paths of files that failed to sync (e.g. failed to write to disk or missing raw content) + + Raises: + HumanloopRuntimeError: If there's an error communicating with the API + """ + start_time = time.time() + normalized_path = self._normalize_path(path) if path else None + + logger.info( + f"Starting pull operation: path={normalized_path or '(root)'}, environment={environment or '(default)'}" + ) + + try: + if ( + normalized_path is None or path is None + ): # path being None means normalized_path is None, but we check both for improved type safety + # Pull all files from the root + logger.debug("Pulling all files from root") + successful_files, failed_files = self._pull_directory( + path=None, + environment=environment, + ) + else: + if self.is_file(path.strip()): + logger.debug(f"Pulling file: {normalized_path}") + if self._pull_file(path=normalized_path, environment=environment): + successful_files = [path] + failed_files = [] + else: + successful_files = [] + failed_files = [path] + else: + logger.debug(f"Pulling directory: {normalized_path}") + successful_files, failed_files = self._pull_directory(normalized_path, environment) + + # Clear the cache at the end of each pull operation + self.clear_cache() + + duration_ms = int((time.time() - start_time) * 1000) + logger.info(f"Pull completed in {duration_ms}ms: {len(successful_files)} files succeeded") + + return successful_files, failed_files + except Exception as e: + raise HumanloopRuntimeError(f"Pull operation failed: {str(e)}") diff --git a/tests/custom/README.md b/tests/custom/README.md new file mode 100644 index 00000000..14ff7ed4 --- /dev/null +++ b/tests/custom/README.md @@ -0,0 +1,19 @@ +# Custom Tests Directory + +This directory contains custom tests for the Humanloop Python SDK. While the main SDK is auto-generated using [Fern](https://buildwithfern.com/), this directory allows us to add our own test implementations that won't be overwritten during regeneration. + +## Why Custom Tests? + +- **Preservation**: Tests in this directory won't be overwritten when regenerating the SDK +- **Custom Implementation**: Allows testing of our own implementations beyond the auto-generated code +- **Integration**: Enables testing of how our custom code works with the auto-generated SDK + +## Running Tests + +```bash +# Run all custom tests +pytest tests/custom/ + +# Run specific test file +pytest tests/custom/sync/test_sync_client.py +``` diff --git a/tests/otel/__init__.py b/tests/custom/__init__.py similarity index 100% rename from tests/otel/__init__.py rename to tests/custom/__init__.py diff --git a/tests/assets/exact_match.py b/tests/custom/assets/exact_match.py similarity index 100% rename from tests/assets/exact_match.py rename to tests/custom/assets/exact_match.py diff --git a/tests/assets/levenshtein.py b/tests/custom/assets/levenshtein.py similarity index 100% rename from tests/assets/levenshtein.py rename to tests/custom/assets/levenshtein.py diff --git a/tests/conftest.py b/tests/custom/conftest.py similarity index 58% rename from tests/conftest.py rename to tests/custom/conftest.py index 80e3b336..8e400483 100644 --- a/tests/conftest.py +++ b/tests/custom/conftest.py @@ -1,18 +1,9 @@ -from dataclasses import asdict, dataclass import os -import random -import string -import time -from typing import Callable, Generator -import typing +from typing import Generator from unittest.mock import MagicMock -from dotenv import load_dotenv import pytest -from humanloop.base_client import BaseHumanloop -from humanloop.client import Humanloop -from humanloop.otel.exporter import HumanloopSpanExporter -from humanloop.otel.processor import HumanloopSpanProcessor +from dotenv import load_dotenv from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor from opentelemetry.instrumentation.cohere import CohereInstrumentor @@ -26,8 +17,10 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.trace import Tracer -if typing.TYPE_CHECKING: - from humanloop.client import BaseHumanloop +from humanloop.client import Humanloop +from humanloop.otel.exporter import HumanloopSpanExporter +from humanloop.otel.processor import HumanloopSpanProcessor +from tests.custom.types import GetHumanloopClientFn @pytest.fixture(scope="function") @@ -87,10 +80,24 @@ def opentelemetry_test_configuration( instrumentor.uninstrument() +@pytest.fixture(scope="session") +def get_humanloop_client() -> GetHumanloopClientFn: + load_dotenv() + if not os.getenv("HUMANLOOP_API_KEY"): + pytest.fail("HUMANLOOP_API_KEY is not set for integration tests") + + def _get_humanloop_client(use_local_files: bool = False) -> Humanloop: + return Humanloop( + api_key=os.getenv("HUMANLOOP_API_KEY"), + use_local_files=use_local_files, + ) + + return _get_humanloop_client + + @pytest.fixture(scope="function") def opentelemetry_hl_test_configuration( opentelemetry_test_provider: TracerProvider, - humanloop_client: BaseHumanloop, ) -> Generator[tuple[Tracer, InMemorySpanExporter], None, None]: """Configure OTel backend with HumanloopSpanProcessor. @@ -162,117 +169,3 @@ def call_llm_messages() -> list[ChatCompletionMessageParam]: "content": "Bonjour!", }, ] - - -@dataclass -class APIKeys: - openai: str - humanloop: str - - -@pytest.fixture(scope="session") -def api_keys() -> APIKeys: - openai_key = os.getenv("OPENAI_API_KEY") - humanloop_key = os.getenv("HUMANLOOP_API_KEY") - for key_name, key_value in [ - ("OPENAI_API_KEY", openai_key), - ("HUMANLOOP_API_KEY", humanloop_key), - ]: - if key_value is None: - raise ValueError(f"{key_name} is not set in .env file") - api_keys = APIKeys( - openai=openai_key, # type: ignore [arg-type] - humanloop=humanloop_key, # type: ignore [arg-type] - ) - for key, value in asdict(api_keys).items(): - if value is None: - raise ValueError(f"{key.upper()} key is not set in .env file") - return api_keys - - -@pytest.fixture(scope="session") -def humanloop_client(api_keys: APIKeys) -> Humanloop: - return Humanloop(api_key=api_keys.humanloop) - - -@pytest.fixture(scope="session", autouse=True) -def load_env(): - load_dotenv() - - -def directory_cleanup(directory_id: str, humanloop_client: Humanloop): - response = humanloop_client.directories.get(directory_id) - for file in response.files: - file_id = file.id - if file.type == "prompt": - client = humanloop_client.prompts # type: ignore [assignment] - elif file.type == "tool": - client = humanloop_client.tools # type: ignore [assignment] - elif file.type == "dataset": - client = humanloop_client.datasets # type: ignore [assignment] - elif file.type == "evaluator": - client = humanloop_client.evaluators # type: ignore [assignment] - elif file.type == "flow": - client = humanloop_client.flows # type: ignore [assignment] - else: - raise NotImplementedError(f"Unknown HL file type {file.type}") - client.delete(file_id) - - for subdirectory in response.subdirectories: - directory_cleanup( - directory_id=subdirectory.id, - humanloop_client=humanloop_client, - ) - - humanloop_client.directories.delete(id=response.id) - - -@dataclass -class DirectoryIdentifiers: - path: str - id: str - - -@pytest.fixture() -def test_directory( - humanloop_client: Humanloop, -) -> Generator[DirectoryIdentifiers, None, None]: - # Generate a random alphanumeric directory name to avoid conflicts - def get_random_string(length: int = 16) -> str: - return "".join([random.choice(string.ascii_letters + "0123456789") for _ in range(length)]) - - directory_path = "SDK_integ_test_" + get_random_string() - response = humanloop_client.directories.create(path=directory_path) - assert response.path == directory_path - try: - yield DirectoryIdentifiers( - path=response.path, - id=response.id, - ) - finally: - time.sleep(1) - directory_cleanup(response.id, humanloop_client) - - -@pytest.fixture() -def get_test_path(test_directory: DirectoryIdentifiers) -> Callable[[str], str]: - def generate_path(name: str) -> str: - return f"{test_directory.path}/{name}" - - return generate_path - - -# @pytest.fixture(scope="session", autouse=True) -# def cleanup_test_dirs(humanloop_client: Humanloop): -# def _cleanup_all_test_dirs(): -# dirs = humanloop_client.directories.list() -# for dir in dirs: -# if dir.path.startswith("SDK_integ_test_"): -# directory_cleanup( -# directory_id=dir.id, -# humanloop_client=humanloop_client, -# ) - -# _cleanup_all_test_dirs() -# yield -# _cleanup_all_test_dirs() diff --git a/tests/custom/integration/__init__.py b/tests/custom/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/custom/integration/conftest.py b/tests/custom/integration/conftest.py new file mode 100644 index 00000000..039b0f1c --- /dev/null +++ b/tests/custom/integration/conftest.py @@ -0,0 +1,272 @@ +import os +import time +import typing +import uuid +from collections.abc import Generator +from dataclasses import dataclass +from typing import Union + +import dotenv +import pytest +from click.testing import CliRunner + +from humanloop import AgentResponse, PromptResponse +from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams +from tests.custom.types import GetHumanloopClientFn, SyncableFile + + +@dataclass +class ResourceIdentifiers: + file_id: str + file_path: str + + +@pytest.fixture(scope="session") +def openai_key() -> str: + dotenv.load_dotenv() + if not os.getenv("OPENAI_API_KEY"): + pytest.fail("OPENAI_API_KEY is not set for integration tests") + return os.getenv("OPENAI_API_KEY") # type: ignore [return-value] + + +@pytest.fixture(scope="function") +def sdk_test_dir(get_humanloop_client: GetHumanloopClientFn) -> Generator[str, None, None]: + humanloop_client = get_humanloop_client() + + def _get_subclient(file_type: str): + try: + return { + "agent": humanloop_client.agents, + "prompt": humanloop_client.prompts, + "dataset": humanloop_client.datasets, + "evaluator": humanloop_client.evaluators, + "flow": humanloop_client.flows, + "tool": humanloop_client.tools, + }[file_type] + except KeyError: + raise NotImplementedError(f"Unknown file type: {file_type}") + + def cleanup_directory(directory_id: str): + directory_response = humanloop_client.directories.get(id=directory_id) + for subdirectory in directory_response.subdirectories: + cleanup_directory(subdirectory.id) + for file in directory_response.files: + subclient = _get_subclient(typing.cast(str, file.type)) + subclient.delete(id=file.id) + humanloop_client.directories.delete(id=directory_response.id) + + path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}" + response = None + try: + response = humanloop_client.directories.create(path=path) + yield response.path + except Exception as e: + pytest.fail(f"Failed to create directory {path}: {e}") + finally: + if response: + time.sleep(5) + cleanup_directory(response.id) + + +@pytest.fixture(scope="function") +def test_prompt_config() -> PromptKernelRequestParams: + return { + "provider": "openai", + "model": "gpt-4o-mini", + "temperature": 0.5, + "template": [ + { + "role": "system", + "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.", + }, + { + "role": "user", + "content": "{{question}}", + }, + ], + } + + +@pytest.fixture(scope="function") +def prompt( + get_humanloop_client: GetHumanloopClientFn, + sdk_test_dir: str, + test_prompt_config: PromptKernelRequestParams, +) -> Generator[ResourceIdentifiers, None, None]: + humanloop_client = get_humanloop_client() + prompt_path = f"{sdk_test_dir}/prompt" + try: + response = humanloop_client.prompts.upsert( + path=prompt_path, + **test_prompt_config, + ) + yield ResourceIdentifiers(file_id=response.id, file_path=response.path) + humanloop_client.prompts.delete(id=response.id) + except Exception as e: + pytest.fail(f"Failed to create prompt {prompt_path}: {e}") + + +@pytest.fixture(scope="function") +def eval_dataset( + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str +) -> Generator[ResourceIdentifiers, None, None]: + humanloop_client = get_humanloop_client() + dataset_path = f"{sdk_test_dir}/eval_dataset" + try: + response = humanloop_client.datasets.upsert( + path=dataset_path, + datapoints=[ + { + "inputs": { + "question": "What is the capital of the France?", + }, + }, + { + "inputs": { + "question": "What is the capital of the Germany?", + }, + }, + { + "inputs": { + "question": "What is 2+2?", + }, + }, + ], + ) + yield ResourceIdentifiers(file_id=response.id, file_path=response.path) + humanloop_client.datasets.delete(id=response.id) + except Exception as e: + pytest.fail(f"Failed to create dataset {dataset_path}: {e}") + + +@pytest.fixture(scope="function") +def eval_prompt( + get_humanloop_client: GetHumanloopClientFn, + sdk_test_dir: str, + openai_key: str, + test_prompt_config: PromptKernelRequestParams, +) -> Generator[ResourceIdentifiers, None, None]: + humanloop_client = get_humanloop_client() + prompt_path = f"{sdk_test_dir}/eval_prompt" + try: + response = humanloop_client.prompts.upsert( + path=prompt_path, + **test_prompt_config, + ) + yield ResourceIdentifiers(file_id=response.id, file_path=response.path) + humanloop_client.prompts.delete(id=response.id) + except Exception as e: + pytest.fail(f"Failed to create prompt {prompt_path}: {e}") + + +@pytest.fixture(scope="function") +def output_not_null_evaluator( + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str +) -> Generator[ResourceIdentifiers, None, None]: + humanloop_client = get_humanloop_client() + evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator" + try: + response = humanloop_client.evaluators.upsert( + path=evaluator_path, + spec={ + "arguments_type": "target_required", + "return_type": "boolean", + "code": """ +def output_not_null(log: dict) -> bool: + return log["output"] is not None + """, + "evaluator_type": "python", + }, + ) + yield ResourceIdentifiers(file_id=response.id, file_path=response.path) + humanloop_client.evaluators.delete(id=response.id) + except Exception as e: + pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}") + + +@pytest.fixture(scope="function") +def id_for_staging_environment(get_humanloop_client: GetHumanloopClientFn, eval_prompt: ResourceIdentifiers) -> str: + humanloop_client = get_humanloop_client() + response = humanloop_client.prompts.list_environments(id=eval_prompt.file_id) + for environment in response: + if environment.name == "staging": + return environment.id + pytest.fail("Staging environment not found") + + +@pytest.fixture +def syncable_files_fixture( + get_humanloop_client: GetHumanloopClientFn, + sdk_test_dir: str, +) -> Generator[list[SyncableFile], None, None]: + """Creates a predefined structure of files in Humanloop for testing sync.""" + files: list[SyncableFile] = [ + SyncableFile( + path="prompts/gpt-4", + type="prompt", + model="gpt-4", + ), + SyncableFile( + path="prompts/gpt-4o", + type="prompt", + model="gpt-4o", + ), + SyncableFile( + path="prompts/nested/complex/gpt-4o", + type="prompt", + model="gpt-4o", + ), + SyncableFile( + path="agents/gpt-4", + type="agent", + model="gpt-4", + ), + SyncableFile( + path="agents/gpt-4o", + type="agent", + model="gpt-4o", + ), + ] + + humanloop_client = get_humanloop_client() + created_files = [] + for file in files: + full_path = f"{sdk_test_dir}/{file.path}" + response: Union[AgentResponse, PromptResponse] + if file.type == "prompt": + response = humanloop_client.prompts.upsert( + path=full_path, + model=file.model, + ) + elif file.type == "agent": + response = humanloop_client.agents.upsert( + path=full_path, + model=file.model, + ) + created_files.append( + SyncableFile( + path=full_path, type=file.type, model=file.model, id=response.id, version_id=response.version_id + ) + ) + + yield created_files + + +@pytest.fixture +def cli_runner() -> CliRunner: + """GIVEN a CLI runner + THEN it should be configured to catch exceptions + """ + return CliRunner(mix_stderr=False) + + +@pytest.fixture +def no_humanloop_api_key_in_env(monkeypatch): + """Fixture that removes HUMANLOOP_API_KEY from environment variables. + + Use this fixture in tests that verify behavior when no API key is available + in the environment (but could still be loaded from .env files). + """ + # Remove API key from environment + monkeypatch.delenv("HUMANLOOP_API_KEY", raising=False) + yield diff --git a/tests/integration/test_decorators.py b/tests/custom/integration/test_decorators.py similarity index 56% rename from tests/integration/test_decorators.py rename to tests/custom/integration/test_decorators.py index 218453a6..59638896 100644 --- a/tests/integration/test_decorators.py +++ b/tests/custom/integration/test_decorators.py @@ -2,27 +2,28 @@ from typing import Any from openai import OpenAI -from humanloop.client import Humanloop -from humanloop.types.chat_message import ChatMessage + +from tests.custom.integration.conftest import GetHumanloopClientFn def test_prompt_decorator( - humanloop_test_client: Humanloop, + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str, test_prompt_config: dict[str, Any], openai_key: str, ): try: + humanloop_client = get_humanloop_client() prompt_path = f"{sdk_test_dir}/test_prompt" - prompt_response = humanloop_test_client.prompts.upsert( + prompt_response = humanloop_client.prompts.upsert( path=prompt_path, **test_prompt_config, ) - prompt_versions_response = humanloop_test_client.prompts.list_versions(id=prompt_response.id) + prompt_versions_response = humanloop_client.prompts.list_versions(id=prompt_response.id) assert len(prompt_versions_response.records) == 1 - @humanloop_test_client.prompt(path=prompt_path) + @humanloop_client.prompt(path=prompt_path) def my_prompt(question: str) -> str: openai_client = OpenAI(api_key=openai_key) @@ -37,26 +38,27 @@ def my_prompt(question: str) -> str: assert "paris" in my_prompt("What is the capital of the France?").lower() time.sleep(5) - prompt_versions_response = humanloop_test_client.prompts.list_versions(id=prompt_response.id) + prompt_versions_response = humanloop_client.prompts.list_versions(id=prompt_response.id) assert len(prompt_versions_response.records) == 2 - logs_response = humanloop_test_client.logs.list(file_id=prompt_response.id, page=1, size=50) + logs_response = humanloop_client.logs.list(file_id=prompt_response.id, page=1, size=50) assert logs_response.items is not None and len(logs_response.items) == 1 finally: - humanloop_test_client.prompts.delete(id=prompt_response.id) + humanloop_client.prompts.delete(id=prompt_response.id) def test_call_prompt_in_flow_decorator( - humanloop_test_client: Humanloop, + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str, openai_key: str, ): try: + humanloop_client = get_humanloop_client() - @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow") + @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow") def my_flow(question: str) -> str: - response = humanloop_test_client.prompts.call( + response = humanloop_client.prompts.call( path=f"{sdk_test_dir}/test_prompt", prompt={ "provider": "openai", @@ -72,34 +74,35 @@ def my_flow(question: str) -> str: assert "paris" in my_flow("What is the capital of the France?").lower() time.sleep(5) - prompt_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt") + prompt_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt") assert prompt_response is not None - prompt_logs_response = humanloop_test_client.logs.list(file_id=prompt_response.id, page=1, size=50) + prompt_logs_response = humanloop_client.logs.list(file_id=prompt_response.id, page=1, size=50) assert prompt_logs_response.items is not None and len(prompt_logs_response.items) == 1 prompt_log = prompt_logs_response.items[0] - flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow") + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow") assert flow_response is not None - flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50) + flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50) assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1 flow_log = flow_logs_response.items[0] assert prompt_log.trace_parent_id == flow_log.id finally: - flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow") + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow") if flow_response is not None: - humanloop_test_client.flows.delete(id=flow_response.id) - prompt_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt") + humanloop_client.flows.delete(id=flow_response.id) + prompt_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt") if prompt_response is not None: - humanloop_test_client.prompts.delete(id=prompt_response.id) + humanloop_client.prompts.delete(id=prompt_response.id) def test_flow_decorator_logs_exceptions( - humanloop_test_client: Humanloop, + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str, ): try: + humanloop_client = get_humanloop_client() - @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow_log_error") + @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow_log_error") def my_flow(question: str) -> str: raise ValueError("This is a test exception") @@ -107,27 +110,28 @@ def my_flow(question: str) -> str: time.sleep(5) - flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error") + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error") assert flow_response is not None - flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50) + flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50) assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1 flow_log = flow_logs_response.items[0] assert flow_log.error is not None assert flow_log.output is None finally: - flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error") + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error") if flow_response is not None: - humanloop_test_client.flows.delete(id=flow_response.id) + humanloop_client.flows.delete(id=flow_response.id) def test_flow_decorator_populates_output_message( - humanloop_test_client: Humanloop, + get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str, ): try: + humanloop_client = get_humanloop_client() - @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow_log_output_message") + @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow_log_output_message") def my_flow(question: str) -> dict[str, Any]: return {"role": "user", "content": question} @@ -135,11 +139,9 @@ def my_flow(question: str) -> dict[str, Any]: time.sleep(5) - flow_response = humanloop_test_client.files.retrieve_by_path( - path=f"{sdk_test_dir}/test_flow_log_output_message" - ) + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_output_message") assert flow_response is not None - flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50) + flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50) assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1 flow_log = flow_logs_response.items[0] assert flow_log.output_message is not None @@ -147,8 +149,6 @@ def my_flow(question: str) -> dict[str, Any]: assert flow_log.error is None finally: - flow_response = humanloop_test_client.files.retrieve_by_path( - path=f"{sdk_test_dir}/test_flow_log_output_message" - ) + flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_output_message") if flow_response is not None: - humanloop_test_client.flows.delete(id=flow_response.id) + humanloop_client.flows.delete(id=flow_response.id) diff --git a/tests/integration/test_evals.py b/tests/custom/integration/test_evals.py similarity index 66% rename from tests/integration/test_evals.py rename to tests/custom/integration/test_evals.py index 49bbb6dc..d8ba8996 100644 --- a/tests/integration/test_evals.py +++ b/tests/custom/integration/test_evals.py @@ -2,18 +2,20 @@ from typing import Any import pytest -from humanloop.client import Humanloop + from humanloop.error import HumanloopRuntimeError -from tests.integration.conftest import TestIdentifiers +from tests.custom.integration.conftest import ResourceIdentifiers +from tests.custom.types import GetHumanloopClientFn def test_eval_run_works_on_online_files( - humanloop_test_client: Humanloop, - output_not_null_evaluator: TestIdentifiers, - eval_dataset: TestIdentifiers, - eval_prompt: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + output_not_null_evaluator: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, + eval_prompt: ResourceIdentifiers, ) -> None: - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": eval_prompt.file_path, @@ -29,29 +31,30 @@ def test_eval_run_works_on_online_files( ], ) time.sleep(5) - response = humanloop_test_client.evaluations.list(file_id=eval_prompt.file_id) + response = humanloop_client.evaluations.list(file_id=eval_prompt.file_id) assert response.items and len(response.items) == 1 evaluation_id = response.items[0].id - run_evaluation_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined] + run_evaluation_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined] assert run_evaluation_response.runs[0].status == "completed" def test_eval_run_version_id( - humanloop_test_client: Humanloop, - output_not_null_evaluator: TestIdentifiers, - eval_dataset: TestIdentifiers, - eval_prompt: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + output_not_null_evaluator: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, + eval_prompt: ResourceIdentifiers, test_prompt_config: dict[str, Any], ) -> None: + humanloop_client = get_humanloop_client() # GIVEN a prompt where a non-default version is created new_test_prompt_config = test_prompt_config.copy() new_test_prompt_config["temperature"] = 1 - new_prompt_version_response = humanloop_test_client.prompts.upsert( + new_prompt_version_response = humanloop_client.prompts.upsert( path=eval_prompt.file_path, **new_test_prompt_config, ) # WHEN creating an evaluation using version_id - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "id": new_prompt_version_response.id, @@ -68,44 +71,45 @@ def test_eval_run_version_id( ], ) # THEN we evaluate the version created in the test - evaluations_response = humanloop_test_client.evaluations.list(file_id=new_prompt_version_response.id) + evaluations_response = humanloop_client.evaluations.list(file_id=new_prompt_version_response.id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) assert runs_response.runs[0].status == "completed" assert ( runs_response.runs[0].version and runs_response.runs[0].version.version_id == new_prompt_version_response.version_id ) - list_versions_response = humanloop_test_client.prompts.list_versions(id=new_prompt_version_response.id) + list_versions_response = humanloop_client.prompts.list_versions(id=new_prompt_version_response.id) assert list_versions_response.records and len(list_versions_response.records) == 2 # THEN the version used in evaluation is not the default version - response = humanloop_test_client.prompts.get(id=new_prompt_version_response.id) + response = humanloop_client.prompts.get(id=new_prompt_version_response.id) assert response.version_id != new_prompt_version_response.version_id def test_eval_run_environment( - humanloop_test_client: Humanloop, - output_not_null_evaluator: TestIdentifiers, - eval_dataset: TestIdentifiers, - eval_prompt: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + output_not_null_evaluator: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, + eval_prompt: ResourceIdentifiers, test_prompt_config: dict[str, Any], id_for_staging_environment: str, ) -> None: + humanloop_client = get_humanloop_client() # GIVEN a prompt deployed to staging environment new_test_prompt_config = test_prompt_config.copy() new_test_prompt_config["temperature"] = 1 - new_prompt_version_response = humanloop_test_client.prompts.upsert( + new_prompt_version_response = humanloop_client.prompts.upsert( path=eval_prompt.file_path, **new_test_prompt_config, ) - humanloop_test_client.prompts.set_deployment( + humanloop_client.prompts.set_deployment( id=new_prompt_version_response.id, environment_id=id_for_staging_environment, version_id=new_prompt_version_response.version_id, ) # WHEN creating an evaluation using environment - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "id": new_prompt_version_response.id, @@ -122,30 +126,31 @@ def test_eval_run_environment( ], ) # THEN evaluation is done with the version deployed to staging environment - evaluations_response = humanloop_test_client.evaluations.list(file_id=new_prompt_version_response.id) + evaluations_response = humanloop_client.evaluations.list(file_id=new_prompt_version_response.id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) assert runs_response.runs[0].status == "completed" assert ( runs_response.runs[0].version and runs_response.runs[0].version.version_id == new_prompt_version_response.version_id ) - default_prompt_version_response = humanloop_test_client.prompts.get(id=new_prompt_version_response.id) + default_prompt_version_response = humanloop_client.prompts.get(id=new_prompt_version_response.id) assert default_prompt_version_response.version_id != new_prompt_version_response.version_id @pytest.mark.parametrize("version_lookup", ["version_id", "environment"]) def test_eval_run_version_lookup_fails_with_path( - humanloop_test_client: Humanloop, - eval_prompt: TestIdentifiers, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_prompt: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, version_lookup: str, ): # GIVEN an eval run where we try to evaluate a non-default version with pytest.raises(HumanloopRuntimeError) as e: - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": eval_prompt.file_path, @@ -167,13 +172,14 @@ def test_eval_run_version_lookup_fails_with_path( def test_eval_run_with_version_upsert( - humanloop_test_client: Humanloop, - eval_prompt: TestIdentifiers, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_prompt: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, test_prompt_config: dict[str, Any], ): - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": eval_prompt.file_path, @@ -193,23 +199,24 @@ def test_eval_run_with_version_upsert( ], ) # THEN the version is upserted and evaluation finishes successfully - evaluations_response = humanloop_test_client.evaluations.list(file_id=eval_prompt.file_id) + evaluations_response = humanloop_client.evaluations.list(file_id=eval_prompt.file_id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) assert runs_response.runs[0].status == "completed" # THEN a version was upserted based on file.version - list_prompt_versions_response = humanloop_test_client.prompts.list_versions(id=eval_prompt.file_id) + list_prompt_versions_response = humanloop_client.prompts.list_versions(id=eval_prompt.file_id) assert list_prompt_versions_response.records and len(list_prompt_versions_response.records) == 2 def test_flow_eval_does_not_work_without_callable( - humanloop_test_client: Humanloop, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, ): with pytest.raises(HumanloopRuntimeError) as e: - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": "Test Flow", @@ -234,28 +241,29 @@ def test_flow_eval_does_not_work_without_callable( def test_flow_eval_works_with_callable( - humanloop_test_client: Humanloop, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, sdk_test_dir: str, ): + humanloop_client = get_humanloop_client() flow_path = f"{sdk_test_dir}/Test Flow" # GIVEN a flow with a callable - flow_response = humanloop_test_client.flows.upsert( + flow_response = humanloop_client.flows.upsert( path=flow_path, attributes={ "foo": "bar", }, ) try: - flow = humanloop_test_client.flows.upsert( + flow = humanloop_client.flows.upsert( path=flow_path, attributes={ "foo": "bar", }, ) # WHEN we run an evaluation with the flow - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "id": flow.id, @@ -272,22 +280,23 @@ def test_flow_eval_works_with_callable( ], ) # THEN the evaluation finishes successfully - evaluations_response = humanloop_test_client.evaluations.list(file_id=flow.id) + evaluations_response = humanloop_client.evaluations.list(file_id=flow.id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) assert runs_response.runs[0].status == "completed" finally: - humanloop_test_client.flows.delete(id=flow_response.id) + humanloop_client.flows.delete(id=flow_response.id) def test_cannot_evaluate_agent_with_callable( - humanloop_test_client: Humanloop, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, ): with pytest.raises(ValueError) as e: - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": "Test Agent", @@ -307,14 +316,15 @@ def test_cannot_evaluate_agent_with_callable( def test_flow_eval_resolves_to_default_with_callable( - humanloop_test_client: Humanloop, - output_not_null_evaluator: TestIdentifiers, - eval_dataset: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + output_not_null_evaluator: ResourceIdentifiers, + eval_dataset: ResourceIdentifiers, sdk_test_dir: str, ) -> None: + humanloop_client = get_humanloop_client() # GIVEN a flow with some attributes flow_path = f"{sdk_test_dir}/Test Flow" - flow_response = humanloop_test_client.flows.upsert( + flow_response = humanloop_client.flows.upsert( path=flow_path, attributes={ "foo": "bar", @@ -322,7 +332,7 @@ def test_flow_eval_resolves_to_default_with_callable( ) try: # WHEN running an evaluation with the flow's callable but no version - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "id": flow_response.id, @@ -339,24 +349,24 @@ def test_flow_eval_resolves_to_default_with_callable( ], ) # THEN the evaluation finishes successfully - evaluations_response = humanloop_test_client.evaluations.list(file_id=flow_response.id) + evaluations_response = humanloop_client.evaluations.list(file_id=flow_response.id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items and evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined, arg-type] + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined, arg-type] assert runs_response.runs[0].status == "completed" finally: # Clean up test resources - humanloop_test_client.flows.delete(id=flow_response.id) + humanloop_client.flows.delete(id=flow_response.id) -@pytest.mark.skip(reason="Skip until agents are in prod") def test_agent_eval_works_upserting( - humanloop_test_client: Humanloop, - eval_dataset: TestIdentifiers, - output_not_null_evaluator: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + eval_dataset: ResourceIdentifiers, + output_not_null_evaluator: ResourceIdentifiers, sdk_test_dir: str, ): - humanloop_test_client.evaluations.run( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + humanloop_client.evaluations.run( # type: ignore [attr-defined] name="test_eval_run", file={ "path": f"{sdk_test_dir}/Test Agent", @@ -387,7 +397,7 @@ def test_agent_eval_works_upserting( } ], ) - files_response = humanloop_test_client.files.list_files(page=1, size=100) + files_response = humanloop_client.files.list_files(page=1, size=100) eval_agent = None for file in files_response.records: if file.path == f"{sdk_test_dir}/Test Agent": @@ -395,8 +405,8 @@ def test_agent_eval_works_upserting( break assert eval_agent and eval_agent.type == "agent" # THEN the evaluation finishes successfully - evaluations_response = humanloop_test_client.evaluations.list(file_id=eval_agent.id) + evaluations_response = humanloop_client.evaluations.list(file_id=eval_agent.id) assert evaluations_response.items and len(evaluations_response.items) == 1 evaluation_id = evaluations_response.items[0].id - runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined, arg-type] + runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined, arg-type] assert runs_response.runs[0].status == "completed" diff --git a/tests/integration/test_prompts.py b/tests/custom/integration/test_prompts.py similarity index 60% rename from tests/integration/test_prompts.py rename to tests/custom/integration/test_prompts.py index 13ca80eb..f6021b7e 100644 --- a/tests/integration/test_prompts.py +++ b/tests/custom/integration/test_prompts.py @@ -1,14 +1,15 @@ -from humanloop.client import Humanloop - -from tests.integration.conftest import TestIdentifiers +from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams +from tests.custom.integration.conftest import ResourceIdentifiers +from tests.custom.types import GetHumanloopClientFn def test_prompts_call( - humanloop_test_client: Humanloop, - prompt: TestIdentifiers, - test_prompt_config: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + prompt: ResourceIdentifiers, + test_prompt_config: PromptKernelRequestParams, ) -> None: - response = humanloop_test_client.prompts.call( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + response = humanloop_client.prompts.call( # type: ignore [attr-defined] path=prompt.file_path, prompt={**test_prompt_config}, # type: ignore [misc, arg-type, typeddict-item, dict-item, list-item] inputs={"question": "What is the capital of the France?"}, @@ -24,11 +25,12 @@ def test_prompts_call( def test_prompts_call_stream( - humanloop_test_client: Humanloop, - prompt: TestIdentifiers, - test_prompt_config: TestIdentifiers, + get_humanloop_client: GetHumanloopClientFn, + prompt: ResourceIdentifiers, + test_prompt_config: PromptKernelRequestParams, ) -> None: - response = humanloop_test_client.prompts.call_stream( # type: ignore [attr-defined] + humanloop_client = get_humanloop_client() + response = humanloop_client.prompts.call_stream( # type: ignore [attr-defined] path=prompt.file_path, prompt={**test_prompt_config}, # type: ignore [misc, arg-type, typeddict-item, dict-item, list-item] inputs={"question": "What is the capital of the France?"}, diff --git a/tests/custom/integration/test_sync.py b/tests/custom/integration/test_sync.py new file mode 100644 index 00000000..8b33f7a4 --- /dev/null +++ b/tests/custom/integration/test_sync.py @@ -0,0 +1,211 @@ +import typing +from pathlib import Path +from typing import List, Union + +import pytest + +from humanloop import AgentResponse, PromptResponse +from humanloop.agents.client import AgentsClient +from humanloop.error import HumanloopRuntimeError +from humanloop.prompts.client import PromptsClient +from tests.custom.types import GetHumanloopClientFn, SyncableFile + + +@pytest.fixture +def cleanup_local_files(): + """Cleanup any locally synced files after tests""" + yield + local_dir = Path("humanloop") + if local_dir.exists(): + import shutil + + shutil.rmtree(local_dir) + + +def test_pull_basic( + syncable_files_fixture: List[SyncableFile], + get_humanloop_client: GetHumanloopClientFn, +): + """Test that humanloop.sync() correctly syncs remote files to local filesystem""" + # GIVEN a set of files in the remote system (from syncable_files_fixture) + humanloop_client = get_humanloop_client() + + # WHEN running the sync + humanloop_client.pull() + + # THEN our local filesystem should mirror the remote filesystem in the HL Workspace + for file in syncable_files_fixture: + extension = f".{file.type}" + local_path = Path("humanloop") / f"{file.path}{extension}" + + # THEN the file and its directory should exist + assert local_path.exists(), f"Expected synced file at {local_path}" + assert local_path.parent.exists(), f"Expected directory at {local_path.parent}" + + # THEN the file should not be empty + content = local_path.read_text() + assert content, f"File at {local_path} should not be empty" + + +def test_overload_with_local_files( + get_humanloop_client: GetHumanloopClientFn, + syncable_files_fixture: List[SyncableFile], +): + """Test that overload_with_local_files correctly handles local files.""" + # GIVEN a client with use_local_files=True and pulled files + humanloop_client = get_humanloop_client(use_local_files=True) + humanloop_client.pull() + + # GIVEN a test file from the structure + test_file = syncable_files_fixture[0] + extension = f".{test_file.type}" + local_path = Path("humanloop") / f"{test_file.path}{extension}" + + # THEN the file should exist locally + assert local_path.exists(), f"Expected pulled file at {local_path}" + assert local_path.parent.exists(), f"Expected directory at {local_path.parent}" + + # WHEN calling the file + response: Union[AgentResponse, PromptResponse] + if test_file.type == "prompt": + response = humanloop_client.prompts.call( # type: ignore [assignment] + path=test_file.path, messages=[{"role": "user", "content": "Testing"}] + ) + elif test_file.type == "agent": + response = humanloop_client.agents.call( # type: ignore [assignment] + path=test_file.path, messages=[{"role": "user", "content": "Testing"}] + ) + # THEN the response should not be None + assert response is not None + + # WHEN calling with an invalid path + # THEN it should raise HumanloopRuntimeError + with pytest.raises(HumanloopRuntimeError): + try: + sub_client: Union[PromptsClient, AgentsClient] = typing.cast( + Union[PromptsClient, AgentsClient], + { + "prompt": humanloop_client.prompts, + "agent": humanloop_client.agents, + }[test_file.type], + ) + sub_client.call(path="invalid/path") + except KeyError: + raise NotImplementedError(f"Unknown file type: {test_file.type}") + + +def test_overload_log_with_local_files( + get_humanloop_client: GetHumanloopClientFn, + syncable_files_fixture: List[SyncableFile], + sdk_test_dir: str, +): + """Test that overload_with_local_files correctly handles local files for log operations.""" + # GIVEN a client with use_local_files=True and pulled files + humanloop_client = get_humanloop_client(use_local_files=True) + humanloop_client.pull() + + # GIVEN a test file from the structure + test_file = syncable_files_fixture[0] + extension = f".{test_file.type}" + local_path = Path("humanloop") / f"{test_file.path}{extension}" + + # THEN the file should exist locally + assert local_path.exists(), f"Expected pulled file at {local_path}" + assert local_path.parent.exists(), f"Expected directory at {local_path.parent}" + + # WHEN logging with the pulled file + if test_file.type == "prompt": + response = humanloop_client.prompts.log( # type: ignore [assignment] + path=test_file.path, messages=[{"role": "user", "content": "Testing"}], output="Test response" + ) + elif test_file.type == "agent": + response = humanloop_client.agents.log( # type: ignore [assignment] + path=test_file.path, messages=[{"role": "user", "content": "Testing"}], output="Test response" + ) + # THEN the response should not be None + assert response is not None + + # WHEN logging with an invalid path + # THEN it should raise HumanloopRuntimeError + with pytest.raises(HumanloopRuntimeError): + if test_file.type == "prompt": + humanloop_client.prompts.log( + path=f"{sdk_test_dir}/invalid/path", + messages=[{"role": "user", "content": "Testing"}], + output="Test response", + ) + elif test_file.type == "agent": + humanloop_client.agents.log( + path=f"{sdk_test_dir}/invalid/path", + messages=[{"role": "user", "content": "Testing"}], + output="Test response", + ) + + +def test_overload_version_environment_handling( + get_humanloop_client: GetHumanloopClientFn, + syncable_files_fixture: List[SyncableFile], +): + """Test that overload_with_local_files correctly handles version_id and environment parameters.""" + # GIVEN a client with use_local_files=True and pulled files + humanloop_client = get_humanloop_client(use_local_files=True) + humanloop_client.pull() + + # GIVEN a test file from the structure + test_file = syncable_files_fixture[0] + extension = f".{test_file.type}" + local_path = Path("humanloop") / f"{test_file.path}{extension}" + + # THEN the file should exist locally + assert local_path.exists(), f"Expected pulled file at {local_path}" + assert local_path.parent.exists(), f"Expected directory at {local_path.parent}" + + # WHEN calling with version_id + # THEN it should raise HumanloopRuntimeError + with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"): + if test_file.type == "prompt": + humanloop_client.prompts.call( + path=test_file.path, + version_id=test_file.version_id, + messages=[{"role": "user", "content": "Testing"}], + ) + elif test_file.type == "agent": + humanloop_client.agents.call( + path=test_file.path, + version_id=test_file.version_id, + messages=[{"role": "user", "content": "Testing"}], + ) + + # WHEN calling with environment + # THEN it should raise HumanloopRuntimeError + with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"): + if test_file.type == "prompt": + humanloop_client.prompts.call( + path=test_file.path, + environment="production", + messages=[{"role": "user", "content": "Testing"}], + ) + elif test_file.type == "agent": + humanloop_client.agents.call( + path=test_file.path, + environment="production", + messages=[{"role": "user", "content": "Testing"}], + ) + + # WHEN calling with both version_id and environment + # THEN it should raise HumanloopRuntimeError + with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"): + if test_file.type == "prompt": + humanloop_client.prompts.call( + path=test_file.path, + version_id=test_file.version_id, + environment="staging", + messages=[{"role": "user", "content": "Testing"}], + ) + elif test_file.type == "agent": + humanloop_client.agents.call( + path=test_file.path, + version_id=test_file.version_id, + environment="staging", + messages=[{"role": "user", "content": "Testing"}], + ) diff --git a/tests/custom/integration/test_sync_cli.py b/tests/custom/integration/test_sync_cli.py new file mode 100644 index 00000000..5631d5f0 --- /dev/null +++ b/tests/custom/integration/test_sync_cli.py @@ -0,0 +1,181 @@ +from pathlib import Path +from unittest import mock + +import pytest +from click.testing import CliRunner + +from humanloop.cli.__main__ import cli +from tests.custom.types import SyncableFile + + +@pytest.fixture +def no_env_file_loading(): + """Fixture that prevents loading API keys from any .env files. + + Use this fixture in tests that verify behavior when no .env files should + be processed, regardless of whether they exist or not. + """ + # Prevent any .env file from being loaded + with mock.patch("humanloop.cli.__main__.load_dotenv", lambda *args, **kwargs: None): + yield + + +def test_pull_without_api_key(cli_runner: CliRunner, no_humanloop_api_key_in_env, no_env_file_loading): + """GIVEN no API key in environment + WHEN running pull command + THEN it should fail with appropriate error message + """ + # WHEN running pull command + result = cli_runner.invoke(cli, ["pull", "--local-files-directory", "humanloop"]) + + # THEN it should fail with appropriate error message + assert result.exit_code == 1 # Our custom error code for API key issues + assert "No API key found" in result.output + assert "Set HUMANLOOP_API_KEY in .env file or environment" in result.output + + +def test_pull_basic( + cli_runner: CliRunner, + syncable_files_fixture: list[SyncableFile], + tmp_path: Path, # this path is used as a temporary store for files locally +): + # GIVEN a base directory for pulled files + base_dir = str(tmp_path / "humanloop") + + # WHEN running pull command + result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--verbose"]) + + # THEN it should succeed + assert result.exit_code == 0 + assert "Pulling files from Humanloop..." in result.output + assert "Pull completed" in result.output + + # THEN the files should exist locally + for file in syncable_files_fixture: + extension = f".{file.type}" + local_path = Path(base_dir) / f"{file.path}{extension}" + assert local_path.exists(), f"Expected synced file at {local_path}" + assert local_path.parent.exists(), f"Expected directory at {local_path.parent}" + assert local_path.read_text(), f"File at {local_path} should not be empty" + + +def test_pull_with_specific_path( + cli_runner: CliRunner, + syncable_files_fixture: list[SyncableFile], + tmp_path: Path, +): + """GIVEN a specific path to pull + WHEN running pull command with path + THEN it should pull only files from that path + """ + # GIVEN a base directory and specific path + base_dir = str(tmp_path / "humanloop") + test_path = syncable_files_fixture[ + 0 + ].path.split( + "/" + )[ + 0 + ] # Retrieve the prefix of the first file's path which corresponds to the sdk_test_dir used within syncable_files_fixture + + # WHEN running pull command with path + result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--path", test_path, "--verbose"]) + + # THEN it should succeed and show the path + assert result.exit_code == 0 + assert f"Path: {test_path}" in result.output + + # THEN only files from that path should exist locally + for file in syncable_files_fixture: + extension = f".{file.type}" + local_path = Path(base_dir) / f"{file.path}{extension}" + if file.path.startswith(test_path): + assert local_path.exists(), f"Expected synced file at {local_path}" + else: + assert not local_path.exists(), f"Unexpected file at {local_path}" + + +def test_pull_with_environment( + cli_runner: CliRunner, + syncable_files_fixture: list[SyncableFile], + tmp_path: Path, +): + # GIVEN a base directory and environment + base_dir = str(tmp_path / "humanloop") + environment = "staging" + + # WHEN running pull command with environment + result = cli_runner.invoke( + cli, + [ + "pull", + "--local-files-directory", + base_dir, + "--environment", + environment, + "--verbose", + ], + ) + + # THEN it should succeed and show the environment + assert result.exit_code == 0 + assert f"Environment: {environment}" in result.output + + +def test_pull_with_quiet_mode( + cli_runner: CliRunner, + syncable_files_fixture: list[SyncableFile], + tmp_path: Path, +): + # GIVEN a base directory and quiet mode + base_dir = str(tmp_path / "humanloop") + + # WHEN running pull command with quiet mode + result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--quiet"]) + + # THEN it should succeed but not show file list + assert result.exit_code == 0 + assert "Successfully pulled" not in result.output + + # THEN files should still be pulled + for file in syncable_files_fixture: + extension = f".{file.type}" + local_path = Path(base_dir) / f"{file.path}{extension}" + assert local_path.exists(), f"Expected synced file at {local_path}" + + +def test_pull_with_invalid_path( + cli_runner: CliRunner, +): + # GIVEN an invalid base directory + path = "nonexistent/path" + + # WHEN running pull command + result = cli_runner.invoke(cli, ["pull", "--path", path]) + + # THEN it should fail + assert result.exit_code == 1 + assert "Error" in result.output + + +def test_pull_with_invalid_environment(cli_runner: CliRunner, tmp_path: Path): + # GIVEN an invalid environment + environment = "nonexistent" + base_dir = str(tmp_path / "humanloop") + + # WHEN running pull command + result = cli_runner.invoke( + cli, + [ + "pull", + "--local-files-directory", + base_dir, + "--environment", + environment, + "--verbose", + ], + ) + + # THEN it should fail + assert result.exit_code == 1 + assert "Error" in result.output diff --git a/tests/custom/otel/__init__.py b/tests/custom/otel/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/otel/test_helpers.py b/tests/custom/otel/test_helpers.py similarity index 99% rename from tests/otel/test_helpers.py rename to tests/custom/otel/test_helpers.py index 3bd5ce45..f7ff6555 100644 --- a/tests/otel/test_helpers.py +++ b/tests/custom/otel/test_helpers.py @@ -1,7 +1,8 @@ import pytest -from humanloop.otel.helpers import read_from_opentelemetry_span, write_to_opentelemetry_span from opentelemetry.sdk.trace import Span +from humanloop.otel.helpers import read_from_opentelemetry_span, write_to_opentelemetry_span + def test_read_empty(test_span: Span): with pytest.raises(TypeError): diff --git a/tests/custom/sync/__init__.py b/tests/custom/sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/custom/sync/test_client.py b/tests/custom/sync/test_client.py new file mode 100644 index 00000000..ac83d259 --- /dev/null +++ b/tests/custom/sync/test_client.py @@ -0,0 +1,128 @@ +import logging +from pathlib import Path +from typing import Literal +from unittest.mock import Mock, patch + +import pytest + +from humanloop.error import HumanloopRuntimeError +from humanloop.sync.sync_client import SerializableFileType, SyncClient + + +@pytest.fixture +def mock_client() -> Mock: + return Mock() + + +@pytest.fixture +def sync_client(mock_client: Mock, tmp_path: Path) -> SyncClient: + return SyncClient( + client=mock_client, + base_dir=str(tmp_path), + cache_size=10, + log_level=logging.DEBUG, # DEBUG level for testing # noqa: F821 + ) + + +def test_init(sync_client: SyncClient, tmp_path: Path): + """Test basic initialization of SyncClient.""" + # GIVEN a SyncClient instance + # THEN it should be initialized with correct base directory, cache size and file types + assert sync_client.base_dir == tmp_path + assert sync_client._cache_size == 10 + assert sync_client.SERIALIZABLE_FILE_TYPES == frozenset(["prompt", "agent"]) + + +def test_normalize_path(sync_client: SyncClient): + """Test path normalization functionality.""" + # GIVEN various file paths with different formats + test_cases = [ + ("path/to/file.prompt", "path/to/file"), + ("path\\to\\file.agent", "path/to/file"), + ("trailing/slashes/file.agent/", "trailing/slashes/file"), + ("multiple//slashes//file.prompt", "multiple/slashes/file"), + ] + + for input_path, expected in test_cases: + # WHEN they are normalized + normalized = sync_client._normalize_path(input_path) + # THEN they should be converted to the expected format + assert normalized == expected + + # Test absolute path raises error + with pytest.raises(HumanloopRuntimeError, match="Absolute paths are not supported"): + sync_client._normalize_path("/leading/slashes/file.prompt") + + +def test_is_file(sync_client: SyncClient): + """Test file type detection.""" + # GIVEN various file paths + # WHEN checking if they are valid file types + # THEN only .prompt and .agent files should return True + assert sync_client.is_file("test.prompt") + assert sync_client.is_file("test.agent") + assert not sync_client.is_file("test.txt") + assert not sync_client.is_file("test") + + +def test_save_and_read_file(sync_client: SyncClient): + """Test saving and reading files.""" + # GIVEN a file content and path + content = "test content" + path = "test/path" + file_type: SerializableFileType = "prompt" + + # WHEN saving the file + sync_client._save_serialized_file(content, path, "prompt") + saved_path = sync_client.base_dir / path + saved_path = saved_path.parent / f"{saved_path.stem}.{file_type}" + + # THEN the file should exist on disk + assert saved_path.exists() + + # WHEN reading the file + read_content = sync_client.get_file_content(path, file_type) + + # THEN the content should match + assert read_content == content + + +def test_error_handling(sync_client: SyncClient): + """Test error handling in various scenarios.""" + # GIVEN a nonexistent file + # WHEN trying to read it + # THEN a HumanloopRuntimeError should be raised + with pytest.raises(HumanloopRuntimeError, match="Local file not found"): + sync_client.get_file_content("nonexistent", "prompt") + + # GIVEN an API error + # WHEN trying to pull a file + # THEN it should return False + with patch.object(sync_client.client.files, "retrieve_by_path", side_effect=Exception("API Error")): + assert not sync_client._pull_file("test.prompt") + + +def test_cache_functionality(sync_client: SyncClient): + """Test LRU cache functionality.""" + # GIVEN a test file + content = "test content" + path = "test/path" + file_type: Literal["prompt", "agent"] = "prompt" + sync_client._save_serialized_file(content, path, file_type) + + # WHEN reading the file for the first time + sync_client.get_file_content(path, file_type) + # THEN it should hit disk (implicitly verified by no cache hit) + + # WHEN modifying the file on disk + saved_path = sync_client.base_dir / f"{path}.{file_type}" + saved_path.write_text("modified content") + + # THEN subsequent reads should use cache + assert sync_client.get_file_content(path, file_type) == content + + # WHEN clearing the cache + sync_client.clear_cache() + + # THEN new content should be read from disk + assert sync_client.get_file_content(path, file_type) == "modified content" diff --git a/tests/custom/types.py b/tests/custom/types.py new file mode 100644 index 00000000..b270d9fa --- /dev/null +++ b/tests/custom/types.py @@ -0,0 +1,16 @@ +from typing import NamedTuple, Protocol + +from humanloop import FileType +from humanloop.client import Humanloop + + +class GetHumanloopClientFn(Protocol): + def __call__(self, use_local_files: bool = False) -> Humanloop: ... + + +class SyncableFile(NamedTuple): + path: str + type: FileType + model: str + id: str = "" + version_id: str = "" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py deleted file mode 100644 index d14042a3..00000000 --- a/tests/integration/conftest.py +++ /dev/null @@ -1,169 +0,0 @@ -import io -import os -import uuid -from contextlib import contextmanager, redirect_stdout -from dataclasses import dataclass -from typing import Any, ContextManager, Generator, TextIO - -import dotenv -import pytest -from humanloop.client import Humanloop -from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams - - -@dataclass -class TestIdentifiers: - file_id: str - file_path: str - - -@pytest.fixture() -def capture_stdout() -> ContextManager[TextIO]: - @contextmanager - def _context_manager(): - f = io.StringIO() - with redirect_stdout(f): - yield f - - return _context_manager # type: ignore [return-value] - - -@pytest.fixture(scope="session") -def openai_key() -> str: - dotenv.load_dotenv() - if not os.getenv("OPENAI_API_KEY"): - pytest.fail("OPENAI_API_KEY is not set for integration tests") - return os.getenv("OPENAI_API_KEY") # type: ignore [return-value] - - -@pytest.fixture(scope="session") -def humanloop_test_client() -> Humanloop: - dotenv.load_dotenv() - if not os.getenv("HUMANLOOP_API_KEY"): - pytest.fail("HUMANLOOP_API_KEY is not set for integration tests") - return Humanloop(api_key=os.getenv("HUMANLOOP_API_KEY")) # type: ignore [return-value] - - -@pytest.fixture(scope="function") -def sdk_test_dir(humanloop_test_client: Humanloop) -> Generator[str, None, None]: - path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}" - try: - response = humanloop_test_client.directories.create(path=path) - yield response.path - humanloop_test_client.directories.delete(id=response.id) - except Exception as e: - pytest.fail(f"Failed to create directory {path}: {e}") - - -@pytest.fixture(scope="function") -def test_prompt_config() -> PromptKernelRequestParams: - return { - "provider": "openai", - "model": "gpt-4o-mini", - "temperature": 0.5, - "template": [ - { - "role": "system", - "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.", - }, - { - "role": "user", - "content": "{{question}}", - }, - ], - } - - -@pytest.fixture(scope="function") -def eval_dataset(humanloop_test_client: Humanloop, sdk_test_dir: str) -> Generator[TestIdentifiers, None, None]: - dataset_path = f"{sdk_test_dir}/eval_dataset" - try: - response = humanloop_test_client.datasets.upsert( - path=dataset_path, - datapoints=[ - { - "inputs": { - "question": "What is the capital of the France?", - }, - }, - { - "inputs": { - "question": "What is the capital of the Germany?", - }, - }, - { - "inputs": { - "question": "What is 2+2?", - }, - }, - ], - ) - yield TestIdentifiers(file_id=response.id, file_path=response.path) - humanloop_test_client.datasets.delete(id=response.id) - except Exception as e: - pytest.fail(f"Failed to create dataset {dataset_path}: {e}") - - -@pytest.fixture(scope="function") -def eval_prompt( - humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any] -) -> Generator[TestIdentifiers, None, None]: - prompt_path = f"{sdk_test_dir}/eval_prompt" - try: - response = humanloop_test_client.prompts.upsert( - path=prompt_path, - **test_prompt_config, - ) - yield TestIdentifiers(file_id=response.id, file_path=response.path) - humanloop_test_client.prompts.delete(id=response.id) - except Exception as e: - pytest.fail(f"Failed to create prompt {prompt_path}: {e}") - - -@pytest.fixture(scope="function") -def prompt( - humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any] -) -> Generator[TestIdentifiers, None, None]: - prompt_path = f"{sdk_test_dir}/prompt" - try: - response = humanloop_test_client.prompts.upsert( - path=prompt_path, - **test_prompt_config, - ) - yield TestIdentifiers(file_id=response.id, file_path=response.path) - humanloop_test_client.prompts.delete(id=response.id) - except Exception as e: - pytest.fail(f"Failed to create prompt {prompt_path}: {e}") - - -@pytest.fixture(scope="function") -def output_not_null_evaluator( - humanloop_test_client: Humanloop, sdk_test_dir: str -) -> Generator[TestIdentifiers, None, None]: - evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator" - try: - response = humanloop_test_client.evaluators.upsert( - path=evaluator_path, - spec={ - "arguments_type": "target_required", - "return_type": "boolean", - "code": """ -def output_not_null(log: dict) -> bool: - return log["output"] is not None - """, - "evaluator_type": "python", - }, - ) - yield TestIdentifiers(file_id=response.id, file_path=response.path) - humanloop_test_client.evaluators.delete(id=response.id) - except Exception as e: - pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}") - - -@pytest.fixture(scope="function") -def id_for_staging_environment(humanloop_test_client: Humanloop, eval_prompt: TestIdentifiers) -> str: - response = humanloop_test_client.prompts.list_environments(id=eval_prompt.file_id) - for environment in response: - if environment.name == "staging": - return environment.id - pytest.fail("Staging environment not found") diff --git a/tests/utils/assets/models/__init__.py b/tests/utils/assets/models/__init__.py index 3a1c852e..2cf01263 100644 --- a/tests/utils/assets/models/__init__.py +++ b/tests/utils/assets/models/__init__.py @@ -5,7 +5,7 @@ from .circle import CircleParams from .object_with_defaults import ObjectWithDefaultsParams from .object_with_optional_field import ObjectWithOptionalFieldParams -from .shape import ShapeParams, Shape_CircleParams, Shape_SquareParams +from .shape import Shape_CircleParams, Shape_SquareParams, ShapeParams from .square import SquareParams from .undiscriminated_shape import UndiscriminatedShapeParams diff --git a/tests/utils/assets/models/circle.py b/tests/utils/assets/models/circle.py index 759fe3eb..6125ca54 100644 --- a/tests/utils/assets/models/circle.py +++ b/tests/utils/assets/models/circle.py @@ -3,6 +3,7 @@ # This file was auto-generated by Fern from our API Definition. import typing_extensions + from humanloop.core.serialization import FieldMetadata diff --git a/tests/utils/assets/models/object_with_defaults.py b/tests/utils/assets/models/object_with_defaults.py index ef14f7b2..a977b1d2 100644 --- a/tests/utils/assets/models/object_with_defaults.py +++ b/tests/utils/assets/models/object_with_defaults.py @@ -3,7 +3,6 @@ # This file was auto-generated by Fern from our API Definition. import typing_extensions -import typing_extensions class ObjectWithDefaultsParams(typing_extensions.TypedDict): diff --git a/tests/utils/assets/models/object_with_optional_field.py b/tests/utils/assets/models/object_with_optional_field.py index dc3e3eb7..e4ffe724 100644 --- a/tests/utils/assets/models/object_with_optional_field.py +++ b/tests/utils/assets/models/object_with_optional_field.py @@ -2,15 +2,17 @@ # This file was auto-generated by Fern from our API Definition. -import typing_extensions -import typing -from humanloop.core.serialization import FieldMetadata import datetime as dt +import typing import uuid + +import typing_extensions from .color import Color from .shape import ShapeParams from .undiscriminated_shape import UndiscriminatedShapeParams +from humanloop.core.serialization import FieldMetadata + class ObjectWithOptionalFieldParams(typing_extensions.TypedDict): literal: typing.Literal["lit_one"] diff --git a/tests/utils/assets/models/shape.py b/tests/utils/assets/models/shape.py index 540ccabd..56394d93 100644 --- a/tests/utils/assets/models/shape.py +++ b/tests/utils/assets/models/shape.py @@ -3,8 +3,11 @@ # This file was auto-generated by Fern from our API Definition. from __future__ import annotations -import typing_extensions + import typing + +import typing_extensions + from humanloop.core.serialization import FieldMetadata diff --git a/tests/utils/assets/models/square.py b/tests/utils/assets/models/square.py index da4a2111..3f25005d 100644 --- a/tests/utils/assets/models/square.py +++ b/tests/utils/assets/models/square.py @@ -3,6 +3,7 @@ # This file was auto-generated by Fern from our API Definition. import typing_extensions + from humanloop.core.serialization import FieldMetadata diff --git a/tests/utils/assets/models/undiscriminated_shape.py b/tests/utils/assets/models/undiscriminated_shape.py index 68876a23..99f12b30 100644 --- a/tests/utils/assets/models/undiscriminated_shape.py +++ b/tests/utils/assets/models/undiscriminated_shape.py @@ -3,6 +3,7 @@ # This file was auto-generated by Fern from our API Definition. import typing + from .circle import CircleParams from .square import SquareParams diff --git a/tests/utils/test_serialization.py b/tests/utils/test_serialization.py index 2ad8e1b5..40cc847b 100644 --- a/tests/utils/test_serialization.py +++ b/tests/utils/test_serialization.py @@ -2,10 +2,10 @@ from typing import Any, List -from humanloop.core.serialization import convert_and_respect_annotation_metadata - from .assets.models import ObjectWithOptionalFieldParams, ShapeParams +from humanloop.core.serialization import convert_and_respect_annotation_metadata + UNION_TEST: ShapeParams = {"radius_measurement": 1.0, "shape_type": "circle", "id": "1"} UNION_TEST_CONVERTED = {"shapeType": "circle", "radiusMeasurement": 1.0, "id": "1"}