diff --git a/.fernignore b/.fernignore
index 112f779b..fd7adc81 100644
--- a/.fernignore
+++ b/.fernignore
@@ -13,10 +13,13 @@ mypy.ini
 README.md
 src/humanloop/decorators
 src/humanloop/otel
+src/humanloop/sync
+src/humanloop/cli
+pytest.ini
 
 ## Tests
 
-tests/
+tests/custom
 
 ## CI
 
diff --git a/.gitignore b/.gitignore
index a55ede77..f5cda9d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ poetry.toml
 .env
 tests/assets/*.jsonl
 tests/assets/*.parquet
+# Ignore humanloop directory which could mistakenly be committed when testing sync functionality as it's used as the default sync directory
+humanloop
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..8ab80e5d
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = -n auto
diff --git a/tests/integration/__init__.py b/src/humanloop/cli/__init__.py
similarity index 100%
rename from tests/integration/__init__.py
rename to src/humanloop/cli/__init__.py
diff --git a/src/humanloop/cli/__main__.py b/src/humanloop/cli/__main__.py
new file mode 100644
index 00000000..3ab53cfb
--- /dev/null
+++ b/src/humanloop/cli/__main__.py
@@ -0,0 +1,250 @@
+import logging
+import os
+import sys
+import time
+from functools import wraps
+from typing import Callable, Optional
+
+import click
+from dotenv import load_dotenv
+
+from humanloop import Humanloop
+from humanloop.sync.sync_client import SyncClient
+
+# Set up logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)  # Set back to INFO level
+console_handler = logging.StreamHandler()
+formatter = logging.Formatter("%(message)s")  # Simplified formatter
+console_handler.setFormatter(formatter)
+if not logger.hasHandlers():
+    logger.addHandler(console_handler)
+
+# Color constants
+SUCCESS_COLOR = "green"
+ERROR_COLOR = "red"
+INFO_COLOR = "blue"
+WARNING_COLOR = "yellow"
+
+
+def load_api_key(env_file: Optional[str] = None) -> str:
+    """Load API key from .env file or environment variable.
+
+    Args:
+        env_file: Optional path to .env file
+
+    Returns:
+        str: The loaded API key
+
+    Raises:
+        click.ClickException: If no API key is found
+    """
+    # Try specific .env file if provided, otherwise default to .env in current directory
+    if env_file:
+        if not load_dotenv(env_file):  # load_dotenv returns False if file not found/invalid
+            raise click.ClickException(
+                click.style(
+                    f"Failed to load environment file: {env_file} (file not found or invalid format)",
+                    fg=ERROR_COLOR,
+                )
+            )
+    else:
+        load_dotenv()  # Attempt to load from default .env in current directory
+
+    # Get API key from environment
+    api_key = os.getenv("HUMANLOOP_API_KEY")
+    if not api_key:
+        raise click.ClickException(
+            click.style(
+                "No API key found. Set HUMANLOOP_API_KEY in .env file or environment, or use --api-key", fg=ERROR_COLOR
+            )
+        )
+
+    return api_key
+
+
+def get_client(
+    api_key: Optional[str] = None, env_file: Optional[str] = None, base_url: Optional[str] = None
+) -> Humanloop:
+    """Instantiate a Humanloop client for the CLI.
+
+    Args:
+        api_key: Optional API key provided directly
+        env_file: Optional path to .env file
+        base_url: Optional base URL for the API
+
+    Returns:
+        Humanloop: Configured client instance
+
+    Raises:
+        click.ClickException: If no API key is found
+    """
+    if not api_key:
+        api_key = load_api_key(env_file)
+    return Humanloop(api_key=api_key, base_url=base_url)
+
+
+def common_options(f: Callable) -> Callable:
+    """Decorator for common CLI options."""
+
+    @click.option(
+        "--api-key",
+        help="Humanloop API key. If not provided, uses HUMANLOOP_API_KEY from .env or environment.",
+        default=None,
+        show_default=False,
+    )
+    @click.option(
+        "--env-file",
+        help="Path to .env file. If not provided, looks for .env in current directory.",
+        default=None,
+        type=click.Path(exists=True),
+        show_default=False,
+    )
+    @click.option(
+        "--local-files-directory",
+        "--local-dir",
+        help="Directory (relative to the current working directory) where Humanloop files are stored locally (default: humanloop/).",
+        default="humanloop",
+        type=click.Path(),
+    )
+    @click.option(
+        "--base-url",
+        default=None,
+        hidden=True,
+    )
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        return f(*args, **kwargs)
+
+    return wrapper
+
+
+def handle_sync_errors(f: Callable) -> Callable:
+    """Decorator for handling sync operation errors.
+
+    If an error occurs in any operation that uses this decorator, it will be logged and the program will exit with a non-zero exit code.
+    """
+
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        try:
+            return f(*args, **kwargs)
+        except Exception as e:
+            click.echo(click.style(str(f"Error: {e}"), fg=ERROR_COLOR))
+            sys.exit(1)
+
+    return wrapper
+
+
+@click.group(
+    help="Humanloop CLI for managing sync operations.",
+    context_settings={
+        "help_option_names": ["-h", "--help"],
+        "max_content_width": 100,
+    },
+)
+def cli():  # Does nothing because used as a group for other subcommands (pull, push, etc.)
+    """Humanloop CLI for managing sync operations."""
+    pass
+
+
+@cli.command()
+@click.option(
+    "--path",
+    "-p",
+    help="Path in the Humanloop workspace to pull from (file or directory). You can pull an entire directory (e.g. 'my/directory') "
+    "or a specific file (e.g. 'my/directory/my_prompt.prompt'). When pulling a directory, all files within that directory and its subdirectories will be included. "
+    "If not specified, pulls from the root of the remote workspace.",
+    default=None,
+)
+@click.option(
+    "--environment",
+    "-e",
+    help="Environment to pull from (e.g. 'production', 'staging')",
+    default=None,
+)
+@click.option(
+    "--verbose",
+    "-v",
+    is_flag=True,
+    help="Show detailed information about the operation",
+)
+@click.option(
+    "--quiet",
+    "-q",
+    is_flag=True,
+    help="Suppress output of successful files",
+)
+@handle_sync_errors
+@common_options
+def pull(
+    path: Optional[str],
+    environment: Optional[str],
+    api_key: Optional[str],
+    env_file: Optional[str],
+    local_files_directory: str,
+    base_url: Optional[str],
+    verbose: bool,
+    quiet: bool,
+):
+    """Pull Prompt and Agent files from Humanloop to your local filesystem.
+
+    \b
+    This command will:
+    1. Fetch Prompt and Agent files from your Humanloop workspace
+    2. Save them to your local filesystem (directory specified by --local-files-directory, default: humanloop/)
+    3. Maintain the same directory structure as in Humanloop
+    4. Add appropriate file extensions (.prompt or .agent)
+
+    \b
+    For example, with the default --local-files-directory=humanloop, files will be saved as:
+    ./humanloop/
+    ├── my_project/
+    │   ├── prompts/
+    │   │   ├── my_prompt.prompt
+    │   │   └── nested/
+    │   │       └── another_prompt.prompt
+    │   └── agents/
+    │       └── my_agent.agent
+    └── another_project/
+        └── prompts/
+            └── other_prompt.prompt
+
+    \b
+    If you specify --local-files-directory=data/humanloop, files will be saved in ./data/humanloop/ instead.
+
+    If a file exists both locally and in the Humanloop workspace, the local file will be overwritten
+    with the version from Humanloop. Files that only exist locally will not be affected.
+
+    Currently only supports syncing Prompt and Agent files. Other file types will be skipped."""
+    client = get_client(api_key, env_file, base_url)
+    sync_client = SyncClient(
+        client, base_dir=local_files_directory, log_level=logging.DEBUG if verbose else logging.WARNING
+    )
+
+    click.echo(click.style("Pulling files from Humanloop...", fg=INFO_COLOR))
+    click.echo(click.style(f"Path: {path or '(root)'}", fg=INFO_COLOR))
+    click.echo(click.style(f"Environment: {environment or '(default)'}", fg=INFO_COLOR))
+
+    start_time = time.time()
+    successful_files, failed_files = sync_client.pull(path, environment)
+    duration_ms = int((time.time() - start_time) * 1000)
+
+    # Determine if the operation was successful based on failed_files
+    is_successful = not failed_files
+    duration_color = SUCCESS_COLOR if is_successful else ERROR_COLOR
+    click.echo(click.style(f"Pull completed in {duration_ms}ms", fg=duration_color))
+
+    if successful_files and not quiet:
+        click.echo(click.style(f"\nSuccessfully pulled {len(successful_files)} files:", fg=SUCCESS_COLOR))
+        for file in successful_files:
+            click.echo(click.style(f"  ✓ {file}", fg=SUCCESS_COLOR))
+
+    if failed_files:
+        click.echo(click.style(f"\nFailed to pull {len(failed_files)} files:", fg=ERROR_COLOR))
+        for file in failed_files:
+            click.echo(click.style(f"  ✗ {file}", fg=ERROR_COLOR))
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/src/humanloop/client.py b/src/humanloop/client.py
index 74cd6c97..ab6b2abc 100644
--- a/src/humanloop/client.py
+++ b/src/humanloop/client.py
@@ -1,6 +1,7 @@
 import os
 import typing
-from typing import Any, List, Optional, Sequence
+from typing import Any, List, Optional, Sequence, Tuple
+import logging
 
 import httpx
 from opentelemetry.sdk.resources import Resource
@@ -18,7 +19,7 @@
 )
 
 from humanloop.base_client import AsyncBaseHumanloop, BaseHumanloop
-from humanloop.overload import overload_call, overload_log
+from humanloop.overload import overload_client
 from humanloop.decorators.flow import flow as flow_decorator_factory
 from humanloop.decorators.prompt import prompt_decorator_factory
 from humanloop.decorators.tool import tool_decorator_factory as tool_decorator_factory
@@ -29,6 +30,9 @@
 from humanloop.otel.processor import HumanloopSpanProcessor
 from humanloop.prompt_utils import populate_template
 from humanloop.prompts.client import PromptsClient
+from humanloop.sync.sync_client import SyncClient, DEFAULT_CACHE_SIZE
+
+logger = logging.getLogger("humanloop.sdk")
 
 
 class ExtendedEvalsClient(EvaluationsClient):
@@ -87,8 +91,9 @@ class Humanloop(BaseHumanloop):
     """
     See docstring of :class:`BaseHumanloop`.
 
-    This class extends the base client with custom evaluation utilities
-    and decorators for declaring Files in code.
+    This class extends the base client with custom evaluation utilities,
+    decorators for declaring Files in code, and utilities for syncing
+    files between Humanloop and local filesystem.
     """
 
     def __init__(
@@ -102,6 +107,9 @@ def __init__(
         httpx_client: typing.Optional[httpx.Client] = None,
         opentelemetry_tracer_provider: Optional[TracerProvider] = None,
         opentelemetry_tracer: Optional[Tracer] = None,
+        use_local_files: bool = False,
+        local_files_directory: str = "humanloop",
+        cache_size: int = DEFAULT_CACHE_SIZE,
     ):
         """
         Extends the base client with custom evaluation utilities and
@@ -111,6 +119,27 @@ def __init__(
         You can provide a TracerProvider and a Tracer to integrate
         with your existing telemetry system. If not provided,
         an internal TracerProvider will be used.
+
+        Parameters
+        ----------
+        base_url: Optional base URL for the API
+        environment: The environment to use (default: DEFAULT)
+        api_key: Your Humanloop API key (default: from HUMANLOOP_API_KEY env var)
+        timeout: Optional timeout for API requests
+        follow_redirects: Whether to follow redirects
+        httpx_client: Optional custom httpx client
+        opentelemetry_tracer_provider: Optional tracer provider for telemetry
+        opentelemetry_tracer: Optional tracer for telemetry
+        use_local_files: Whether to use local files for prompts and agents
+        local_files_directory: Base directory where local prompt and agent files are stored (default: "humanloop").
+                      This is relative to the current working directory. For example:
+                      - "humanloop" will look for files in "./humanloop/"
+                      - "data/humanloop" will look for files in "./data/humanloop/"
+                      When using paths in the API, they must be relative to this directory. For example,
+                      if local_files_directory="humanloop" and you have a file at "humanloop/samples/test.prompt",
+                      you would reference it as "samples/test" in your code.
+        cache_size: Maximum number of files to cache when use_local_files is True (default: DEFAULT_CACHE_SIZE).
+                   This parameter has no effect if use_local_files is False.
         """
         super().__init__(
             base_url=base_url,
@@ -121,6 +150,17 @@ def __init__(
             httpx_client=httpx_client,
         )
 
+        self.use_local_files = use_local_files
+
+        # Warn user if cache_size is non-default but use_local_files is False — has no effect and will therefore be ignored
+        if not self.use_local_files and cache_size != DEFAULT_CACHE_SIZE:
+            logger.warning(
+                f"The specified cache_size={cache_size} will have no effect because use_local_files=False. "
+                f"File caching is only active when local files are enabled."
+            )
+
+        # Check if cache_size is non-default but use_local_files is False
+        self._sync_client = SyncClient(client=self, base_dir=local_files_directory, cache_size=cache_size)
         eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
         eval_client.client = self
         self.evaluations = eval_client
@@ -128,10 +168,14 @@ def __init__(
 
         # Overload the .log method of the clients to be aware of Evaluation Context
         # and the @flow decorator providing the trace_id
-        self.prompts = overload_log(client=self.prompts)
-        self.prompts = overload_call(client=self.prompts)
-        self.flows = overload_log(client=self.flows)
-        self.tools = overload_log(client=self.tools)
+        self.prompts = overload_client(
+            client=self.prompts, sync_client=self._sync_client, use_local_files=self.use_local_files
+        )
+        self.agents = overload_client(
+            client=self.agents, sync_client=self._sync_client, use_local_files=self.use_local_files
+        )
+        self.flows = overload_client(client=self.flows)
+        self.tools = overload_client(client=self.tools)
 
         if opentelemetry_tracer_provider is not None:
             self._tracer_provider = opentelemetry_tracer_provider
@@ -351,6 +395,53 @@ def agent():
             attributes=attributes,
         )
 
+    def pull(self, path: Optional[str] = None, environment: Optional[str] = None) -> Tuple[List[str], List[str]]:
+        """Pull Prompt and Agent files from Humanloop to local filesystem.
+
+        This method will:
+        1. Fetch Prompt and Agent files from your Humanloop workspace
+        2. Save them to your local filesystem (directory specified by `local_files_directory`, default: "humanloop")
+        3. Maintain the same directory structure as in Humanloop
+        4. Add appropriate file extensions (`.prompt` or `.agent`)
+
+        The path parameter can be used in two ways:
+        - If it points to a specific file (e.g. "path/to/file.prompt" or "path/to/file.agent"), only that file will be pulled
+        - If it points to a directory (e.g. "path/to/directory"), all Prompt and Agent files in that directory and its subdirectories will be pulled
+        - If no path is provided, all Prompt and Agent files will be pulled
+
+        The operation will overwrite existing files with the latest version from Humanloop
+        but will not delete local files that don't exist in the remote workspace.
+
+        Currently only supports syncing Prompt and Agent files. Other file types will be skipped.
+
+        For example, with the default `local_files_directory="humanloop"`, files will be saved as:
+        ```
+        ./humanloop/
+        ├── my_project/
+        │   ├── prompts/
+        │   │   ├── my_prompt.prompt
+        │   │   └── nested/
+        │   │       └── another_prompt.prompt
+        │   └── agents/
+        │       └── my_agent.agent
+        └── another_project/
+            └── prompts/
+                └── other_prompt.prompt
+        ```
+
+        If you specify `local_files_directory="data/humanloop"`, files will be saved in ./data/humanloop/ instead.
+
+        :param path: Optional path to either a specific file (e.g. "path/to/file.prompt") or a directory (e.g. "path/to/directory").
+                    If not provided, all Prompt and Agent files will be pulled.
+        :param environment: The environment to pull the files from.
+        :return: Tuple of two lists:
+             - First list contains paths of successfully synced files
+             - Second list contains paths of files that failed to sync (due to API errors, missing content,
+               or filesystem issues)
+        :raises HumanloopRuntimeError: If there's an error communicating with the API
+        """
+        return self._sync_client.pull(environment=environment, path=path)
+
 
 class AsyncHumanloop(AsyncBaseHumanloop):
     """
diff --git a/src/humanloop/overload.py b/src/humanloop/overload.py
index b0c83215..92c83e6b 100644
--- a/src/humanloop/overload.py
+++ b/src/humanloop/overload.py
@@ -1,54 +1,69 @@
 import inspect
 import logging
 import types
-from typing import TypeVar, Union
+from typing import Any, Dict, Optional, Union, Callable
 
 from humanloop.context import (
     get_decorator_context,
     get_evaluation_context,
     get_trace_id,
 )
-from humanloop.evals.run import HumanloopRuntimeError
-
-from humanloop.evaluators.client import EvaluatorsClient
-from humanloop.flows.client import FlowsClient
+from humanloop.error import HumanloopRuntimeError
+from humanloop.sync.sync_client import SyncClient
 from humanloop.prompts.client import PromptsClient
+from humanloop.flows.client import FlowsClient
+from humanloop.datasets.client import DatasetsClient
+from humanloop.agents.client import AgentsClient
 from humanloop.tools.client import ToolsClient
+from humanloop.evaluators.client import EvaluatorsClient
+from humanloop.types import FileType
 from humanloop.types.create_evaluator_log_response import CreateEvaluatorLogResponse
 from humanloop.types.create_flow_log_response import CreateFlowLogResponse
 from humanloop.types.create_prompt_log_response import CreatePromptLogResponse
 from humanloop.types.create_tool_log_response import CreateToolLogResponse
 from humanloop.types.prompt_call_response import PromptCallResponse
+from humanloop.types.agent_call_response import AgentCallResponse
 
 logger = logging.getLogger("humanloop.sdk")
 
-
-CLIENT_TYPE = TypeVar("CLIENT_TYPE", PromptsClient, FlowsClient, EvaluatorsClient, ToolsClient)
-
-
-def overload_log(client: CLIENT_TYPE) -> CLIENT_TYPE:
-    """
-    Wrap the `log` method of the provided Humanloop client to use EVALUATION_CONTEXT.
-
-    This makes the overloaded log actions be aware of whether the created Log is
-    part of an Evaluation (e.g. one started by eval_utils.run_eval).
-    """
-    # Copy the original log method in a hidden attribute
-    client._log = client.log  # type: ignore [attr-defined]
-
-    def _overload_log(
-        # It's safe to only consider kwargs since the original
-        # log method bans positional arguments
-        self,
-        **kwargs,
-    ) -> Union[
-        CreatePromptLogResponse,
-        CreateToolLogResponse,
-        CreateFlowLogResponse,
-        CreateEvaluatorLogResponse,
-    ]:
-        trace_id = get_trace_id()
-        if trace_id is not None and type(client) is FlowsClient:
+LogResponseType = Union[
+    CreatePromptLogResponse,
+    CreateToolLogResponse,
+    CreateFlowLogResponse,
+    CreateEvaluatorLogResponse,
+]
+
+CallResponseType = Union[
+    PromptCallResponse,
+    AgentCallResponse,
+]
+
+
+def _get_file_type_from_client(
+    client: Union[PromptsClient, AgentsClient, ToolsClient, FlowsClient, DatasetsClient, EvaluatorsClient],
+) -> FileType:
+    """Get the file type based on the client type."""
+    if isinstance(client, PromptsClient):
+        return "prompt"
+    elif isinstance(client, AgentsClient):
+        return "agent"
+    elif isinstance(client, ToolsClient):
+        return "tool"
+    elif isinstance(client, FlowsClient):
+        return "flow"
+    elif isinstance(client, DatasetsClient):
+        return "dataset"
+    elif isinstance(client, EvaluatorsClient):
+        return "evaluator"
+
+    raise ValueError(f"Unsupported client type: {type(client)}")
+
+
+def _handle_tracing_context(kwargs: Dict[str, Any], client: Any) -> Dict[str, Any]:
+    """Handle tracing context for both log and call methods."""
+    trace_id = get_trace_id()
+    if trace_id is not None:
+        if "flow" in str(type(client).__name__).lower():
             context = get_decorator_context()
             if context is None:
                 raise HumanloopRuntimeError("Internal error: trace_id context is set outside a decorator context.")
@@ -56,69 +71,146 @@ def _overload_log(
                 f"Using `flows.log()` is not allowed: Flow decorator "
                 f"for File {context.path} manages the tracing and trace completion."
             )
-        if trace_id is not None:
-            if "trace_parent_id" in kwargs:
-                logger.warning(
-                    "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.",
-                    inspect.currentframe().f_lineno,  # type: ignore [union-attr]
-                )
-            kwargs = {
-                **kwargs,
-                "trace_parent_id": trace_id,
-            }
-        evaluation_context = get_evaluation_context()
-        if evaluation_context is not None:
-            kwargs_eval, eval_callback = evaluation_context.log_args_with_context(
-                path=kwargs.get("path"), log_args=kwargs
-            )
-            try:
-                response = self._log(**kwargs_eval)
-            except Exception as e:
-                # Re-raising as HumanloopDecoratorError so the decorators don't catch it
-                raise HumanloopRuntimeError from e
-            if eval_callback is not None:
-                eval_callback(response.id)
-        else:
-            try:
-                response = self._log(**kwargs)
-            except Exception as e:
-                # Re-raising as HumanloopDecoratorError so the decorators don't catch it
-                raise HumanloopRuntimeError from e
-
-        return response
 
-    # Replace the original log method with the overloaded one
-    client.log = types.MethodType(_overload_log, client)  # type: ignore [assignment]
-    # Return the client with the overloaded log method
-    logger.debug("Overloaded the .call method of %s", client)
-    return client
+        if "trace_parent_id" in kwargs:
+            logger.warning(
+                "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.",
+                inspect.currentframe().f_lineno,  # type: ignore[union-attr]
+            )
+        kwargs = {
+            **kwargs,
+            "trace_parent_id": trace_id,
+        }
+    return kwargs
+
+
+def _handle_local_files(
+    kwargs: Dict[str, Any],
+    client: Any,
+    sync_client: Optional[SyncClient],
+    use_local_files: bool,
+) -> Dict[str, Any]:
+    """Handle local file loading if enabled."""
+    if not use_local_files or "path" not in kwargs or sync_client is None:
+        return kwargs
+
+    if "id" in kwargs:
+        raise HumanloopRuntimeError("Can only specify one of `id` or `path`")
+
+    # Check if version_id or environment is specified
+    use_remote = any(["version_id" in kwargs, "environment" in kwargs])
+    normalized_path = sync_client._normalize_path(kwargs["path"])
+
+    if use_remote:
+        raise HumanloopRuntimeError(
+            f"Cannot use local file for `{normalized_path}` as version_id or environment was specified. "
+            "Please either remove version_id/environment to use local files, or set use_local_files=False to use remote files."
+        )
+
+    file_type = _get_file_type_from_client(client)
+    if file_type not in SyncClient.SERIALIZABLE_FILE_TYPES:
+        raise HumanloopRuntimeError(f"Local files are not supported for `{file_type}` files.")
+
+    # If file_type is already specified in kwargs, it means user provided a PromptKernelRequestParams object
+    if file_type in kwargs and not isinstance(kwargs[file_type], str):
+        logger.warning(
+            f"Ignoring local file for `{normalized_path}` as {file_type} parameters were directly provided. "
+            "Using provided parameters instead."
+        )
+        return kwargs
+
+    try:
+        file_content = sync_client.get_file_content(normalized_path, file_type)  # type: ignore[arg-type] # file_type was checked above
+        kwargs[file_type] = file_content
+    except HumanloopRuntimeError as e:
+        raise HumanloopRuntimeError(f"Failed to use local file for `{normalized_path}`: {str(e)}")
+
+    return kwargs
+
+
+def _handle_evaluation_context(kwargs: Dict[str, Any]) -> tuple[Dict[str, Any], Optional[Callable[[str], None]]]:
+    """Handle evaluation context for logging."""
+    evaluation_context = get_evaluation_context()
+    if evaluation_context is not None:
+        return evaluation_context.log_args_with_context(path=kwargs.get("path"), log_args=kwargs)
+    return kwargs, None
+
+
+def _overload_log(self: Any, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> LogResponseType:
+    try:
+        # Special handling for flows - prevent direct log usage
+        if type(self) is FlowsClient and get_trace_id() is not None:
+            context = get_decorator_context()
+            if context is None:
+                raise HumanloopRuntimeError("Internal error: trace_id context is set outside a decorator context.")
+            raise HumanloopRuntimeError(
+                f"Using `flows.log()` is not allowed: Flow decorator "
+                f"for File {context.path} manages the tracing and trace completion."
+            )
 
+        kwargs = _handle_tracing_context(kwargs, self)
 
-def overload_call(client: PromptsClient) -> PromptsClient:
-    client._call = client.call  # type: ignore [attr-defined]
-
-    def _overload_call(self, **kwargs) -> PromptCallResponse:
-        # None if not logging inside a decorator
-        trace_id = get_trace_id()
-        if trace_id is not None:
-            if "trace_parent_id" in kwargs:
-                logger.warning(
-                    "Ignoring trace_parent_id argument at line %d: the Flow decorator manages tracing.",
-                    inspect.currentframe().f_lineno,  # type: ignore [union-attr]
-                )
-            kwargs = {
-                **kwargs,
-                "trace_parent_id": trace_id,
-            }
-
-        try:
-            response = self._call(**kwargs)
-        except Exception as e:
-            # Re-raising as HumanloopDecoratorError so the decorators don't catch it
-            raise HumanloopRuntimeError from e
+        # Handle local files for Prompts and Agents clients
+        if _get_file_type_from_client(self) in ["prompt", "agent"]:
+            if sync_client is None:
+                logger.error("sync_client is None but client has log method and use_local_files=%s", use_local_files)
+                raise HumanloopRuntimeError("sync_client is required for clients that support local file operations")
+            kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files)
 
+        kwargs, eval_callback = _handle_evaluation_context(kwargs)
+        response = self._log(**kwargs)  # Use stored original method
+        if eval_callback is not None:
+            eval_callback(response.id)
         return response
+    except HumanloopRuntimeError:
+        # Re-raise HumanloopRuntimeError without wrapping to preserve the message
+        raise
+    except Exception as e:
+        # Only wrap non-HumanloopRuntimeError exceptions
+        raise HumanloopRuntimeError from e
+
+
+def _overload_call(self: Any, sync_client: Optional[SyncClient], use_local_files: bool, **kwargs) -> CallResponseType:
+    try:
+        kwargs = _handle_tracing_context(kwargs, self)
+        kwargs = _handle_local_files(kwargs, self, sync_client, use_local_files)
+        return self._call(**kwargs)  # Use stored original method
+    except HumanloopRuntimeError:
+        # Re-raise HumanloopRuntimeError without wrapping to preserve the message
+        raise
+    except Exception as e:
+        # Only wrap non-HumanloopRuntimeError exceptions
+        raise HumanloopRuntimeError from e
+
+
+def overload_client(
+    client: Any,
+    sync_client: Optional[SyncClient] = None,
+    use_local_files: bool = False,
+) -> Any:
+    """Overloads client methods to add tracing, local file handling, and evaluation context."""
+    # Store original log method as _log for all clients. Used in flow decorator
+    if hasattr(client, "log") and not hasattr(client, "_log"):
+        client._log = client.log  # type: ignore[attr-defined]
+
+        # Create a closure to capture sync_client and use_local_files
+        def log_wrapper(self: Any, **kwargs) -> LogResponseType:
+            return _overload_log(self, sync_client, use_local_files, **kwargs)
+
+        client.log = types.MethodType(log_wrapper, client)
+
+    # Overload call method for Prompt and Agent clients
+    if _get_file_type_from_client(client) in ["prompt", "agent"]:
+        if sync_client is None and use_local_files:
+            logger.error("sync_client is None but client has call method and use_local_files=%s", use_local_files)
+            raise HumanloopRuntimeError("sync_client is required for clients that support call operations")
+        if hasattr(client, "call") and not hasattr(client, "_call"):
+            client._call = client.call  # type: ignore[attr-defined]
+
+            # Create a closure to capture sync_client and use_local_files
+            def call_wrapper(self: Any, **kwargs) -> CallResponseType:
+                return _overload_call(self, sync_client, use_local_files, **kwargs)
+
+            client.call = types.MethodType(call_wrapper, client)
 
-    # Replace the original log method with the overloaded one
-    client.call = types.MethodType(_overload_call, client)  # type: ignore [assignment]
     return client
diff --git a/src/humanloop/sync/__init__.py b/src/humanloop/sync/__init__.py
new file mode 100644
index 00000000..007659df
--- /dev/null
+++ b/src/humanloop/sync/__init__.py
@@ -0,0 +1,3 @@
+from humanloop.sync.sync_client import SyncClient
+
+__all__ = ["SyncClient"]
diff --git a/src/humanloop/sync/sync_client.py b/src/humanloop/sync/sync_client.py
new file mode 100644
index 00000000..b1cf091a
--- /dev/null
+++ b/src/humanloop/sync/sync_client.py
@@ -0,0 +1,376 @@
+import logging
+from pathlib import Path
+from typing import List, Optional, Tuple, TYPE_CHECKING, Union
+from functools import lru_cache
+import typing
+import time
+from humanloop.error import HumanloopRuntimeError
+import json
+
+if TYPE_CHECKING:
+    from humanloop.base_client import BaseHumanloop
+
+# Set up logging
+logger = logging.getLogger("humanloop.sdk.sync")
+logger.setLevel(logging.INFO)
+console_handler = logging.StreamHandler()
+formatter = logging.Formatter("%(message)s")
+console_handler.setFormatter(formatter)
+if not logger.hasHandlers():
+    logger.addHandler(console_handler)
+
+# Default cache size for file content caching
+DEFAULT_CACHE_SIZE = 100
+
+
+def format_api_error(error: Exception) -> str:
+    """Format API error messages to be more user-friendly."""
+    error_msg = str(error)
+    if "status_code" not in error_msg or "body" not in error_msg:
+        return error_msg
+
+    try:
+        # Extract the body part and parse as JSON
+        body_str = error_msg.split("body: ")[1]
+        # Convert Python dict string to valid JSON by:
+        # 1. Escaping double quotes
+        # 2. Replacing single quotes with double quotes
+        body_str = body_str.replace('"', '\\"').replace("'", '"')
+        body = json.loads(body_str)
+
+        # Get the detail from the body
+        detail = body.get("detail", {})
+
+        # Handle both string and dictionary types for detail
+        if isinstance(detail, str):
+            return detail
+        elif isinstance(detail, dict):
+            return detail.get("description") or detail.get("msg") or error_msg
+        else:
+            return error_msg
+    except Exception as e:
+        logger.debug(f"Failed to parse error message: {str(e)}")
+        return error_msg
+
+
+SerializableFileType = typing.Literal["prompt", "agent"]
+
+
+class SyncClient:
+    """Client for managing synchronization between local filesystem and Humanloop.
+
+    This client provides file synchronization between Humanloop and the local filesystem,
+    with built-in caching for improved performance. The cache uses Python's LRU (Least
+    Recently Used) cache to automatically manage memory usage by removing least recently
+    accessed files when the cache is full.
+
+    The cache is automatically updated when files are pulled or saved, and can be
+    manually cleared using the clear_cache() method.
+    """
+
+    # File types that can be serialized to/from the filesystem
+    SERIALIZABLE_FILE_TYPES = frozenset(typing.get_args(SerializableFileType))
+
+    def __init__(
+        self,
+        client: "BaseHumanloop",
+        base_dir: str = "humanloop",
+        cache_size: int = DEFAULT_CACHE_SIZE,
+        log_level: int = logging.WARNING,
+    ):
+        """
+        Parameters
+        ----------
+        client: Humanloop client instance
+        base_dir: Base directory for synced files (default: "humanloop")
+        cache_size: Maximum number of files to cache (default: DEFAULT_CACHE_SIZE)
+        log_level: Log level for logging (default: WARNING)
+        """
+        self.client = client
+        self.base_dir = Path(base_dir)
+        self._cache_size = cache_size
+
+        logger.setLevel(log_level)
+
+        # Create a new cached version of get_file_content with the specified cache size
+        self.get_file_content = lru_cache(maxsize=cache_size)(  # type: ignore [assignment]
+            self._get_file_content_implementation,
+        )
+
+    def _get_file_content_implementation(self, path: str, file_type: SerializableFileType) -> str:
+        """Implementation of get_file_content without the cache decorator.
+
+        This is the actual implementation that gets wrapped by lru_cache.
+
+        Args:
+            path: The normalized path to the file (without extension)
+            file_type: The type of file to get the content of (SerializableFileType)
+
+        Returns:
+            The raw file content
+
+        Raises:
+            HumanloopRuntimeError: In two cases:
+                1. If the file doesn't exist at the expected location
+                2. If there's a filesystem error when trying to read the file
+                   (e.g., permission denied, file is locked, etc.)
+        """
+        # Construct path to local file
+        local_path = self.base_dir / path
+        # Add appropriate extension
+        local_path = local_path.parent / f"{local_path.stem}.{file_type}"
+
+        if not local_path.exists():
+            raise HumanloopRuntimeError(f"Local file not found: {local_path}")
+
+        try:
+            # Read the raw file content
+            with open(local_path) as f:
+                file_content = f.read()
+            logger.debug(f"Using local file content from {local_path}")
+            return file_content
+        except Exception as e:
+            raise HumanloopRuntimeError(f"Error reading local file {local_path}: {str(e)}")
+
+    def get_file_content(self, path: str, file_type: SerializableFileType) -> str:
+        """Get the raw file content of a file from cache or filesystem.
+
+        This method uses an LRU cache to store file contents. When the cache is full,
+        the least recently accessed files are automatically removed to make space.
+
+        Args:
+            path: The normalized path to the file (without extension)
+            file_type: The type of file (Prompt or Agent)
+
+        Returns:
+            The raw file content
+
+        Raises:
+            HumanloopRuntimeError: If the file doesn't exist or can't be read
+        """
+        return self._get_file_content_implementation(path, file_type)
+
+    def clear_cache(self) -> None:
+        """Clear the LRU cache."""
+        self.get_file_content.cache_clear()  # type: ignore [attr-defined]
+
+    def _normalize_path(self, path: str) -> str:
+        """Normalize the path by:
+        1. Converting to a Path object to handle platform-specific separators
+        2. Removing any file extensions
+        3. Converting to a string with forward slashes and no leading/trailing slashes
+        """
+        # Convert to Path object to handle platform-specific separators
+        path_obj = Path(path)
+
+        # Reject absolute paths to ensure all paths are relative to base_dir.
+        # This maintains consistency with the remote filesystem where paths are relative to project root.
+        if path_obj.is_absolute():
+            raise HumanloopRuntimeError(
+                f"Absolute paths are not supported: `{path}`. "
+                f"Paths should be relative to the base directory (`{self.base_dir}`)."
+            )
+
+        # Remove extension, convert to string with forward slashes, and remove leading/trailing slashes
+        normalized = str(path_obj.with_suffix(""))
+        # Replace all backslashes and normalize multiple forward slashes
+        return "/".join(part for part in normalized.replace("\\", "/").split("/") if part)
+
+    def is_file(self, path: str) -> bool:
+        """Check if the path is a file by checking for .{file_type} extension for serializable file types."""
+        return path.endswith(tuple(f".{file_type}" for file_type in self.SERIALIZABLE_FILE_TYPES))
+
+    def _save_serialized_file(
+        self,
+        serialized_content: str,
+        file_path: str,
+        file_type: SerializableFileType,
+    ) -> None:
+        """Save serialized file to local filesystem."""
+        try:
+            # Create full path including base_dir prefix
+            full_path = self.base_dir / file_path
+            # Create directory if it doesn't exist
+            full_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Add file type extension
+            new_path = full_path.parent / f"{full_path.stem}.{file_type}"
+
+            # Write raw file content to file
+            with open(new_path, "w") as f:
+                f.write(serialized_content)
+        except Exception as e:
+            logger.error(f"Failed to write {file_type} {file_path} to disk: {str(e)}")
+            raise
+
+    def _pull_file(self, path: str, environment: Optional[str] = None) -> bool:
+        """Pull a specific file from Humanloop to local filesystem.
+
+        Returns:
+            True if the file was successfully pulled, False otherwise
+        """
+        try:
+            file = self.client.files.retrieve_by_path(
+                path=path,
+                environment=environment,
+                include_raw_file_content=True,
+            )
+
+            if file.type not in self.SERIALIZABLE_FILE_TYPES:
+                logger.error(f"Unsupported file type: {file.type}")
+                return False
+
+            if not file.raw_file_content:  # type: ignore [union-attr]
+                logger.error(f"No content found for {file.type} {path}")
+                return False
+
+            self._save_serialized_file(
+                serialized_content=file.raw_file_content,  # type: ignore [union-attr]
+                file_path=file.path,
+                file_type=typing.cast(SerializableFileType, file.type),
+            )
+            return True
+        except Exception as e:
+            logger.error(f"Failed to pull file {path}: {str(e)}")
+            return False
+
+    def _pull_directory(
+        self,
+        path: Optional[str] = None,
+        environment: Optional[str] = None,
+    ) -> Tuple[List[str], List[str]]:
+        """Sync Prompt and Agent files from Humanloop to local filesystem.
+
+        Returns:
+            Tuple of two lists:
+            - First list contains paths of successfully synced files
+            - Second list contains paths of files that failed to sync.
+              Failures can occur due to missing content in the response or errors during local file writing.
+
+        Raises:
+            HumanloopRuntimeError: If there's an error communicating with the API
+        """
+        successful_files = []
+        failed_files = []
+        page = 1
+
+        logger.debug(f"Fetching files from directory: {path or '(root)'} in environment: {environment or '(default)'}")
+
+        while True:
+            try:
+                logger.debug(f"`{path}`: Requesting page {page} of files")
+                response = self.client.files.list_files(
+                    type=list(self.SERIALIZABLE_FILE_TYPES),
+                    page=page,
+                    size=100,
+                    include_raw_file_content=True,
+                    environment=environment,
+                    path=path,
+                )
+
+                if len(response.records) == 0:
+                    logger.debug(f"Finished reading files for path `{path}`")
+                    break
+
+                logger.debug(f"`{path}`: Read page {page} containing {len(response.records)} files")
+
+                # Process each file
+                for file in response.records:
+                    # Skip if not a serializable file type
+                    if file.type not in self.SERIALIZABLE_FILE_TYPES:
+                        logger.warning(f"Skipping unsupported file type: {file.type}")
+                        continue
+
+                    file_type: SerializableFileType = typing.cast(
+                        SerializableFileType,
+                        file.type,
+                    )
+
+                    # Skip if no raw file content
+                    if not getattr(file, "raw_file_content", None) or not file.raw_file_content:  # type: ignore [union-attr]
+                        logger.warning(f"No content found for {file.type} {file.path}")
+                        failed_files.append(file.path)
+                        continue
+
+                    try:
+                        logger.debug(f"Writing {file.type} {file.path} to disk")
+                        self._save_serialized_file(
+                            serialized_content=file.raw_file_content,  # type: ignore [union-attr]
+                            file_path=file.path,
+                            file_type=file_type,
+                        )
+                        successful_files.append(file.path)
+                    except Exception as e:
+                        failed_files.append(file.path)
+                        logger.error(f"Failed to save {file.path}: {str(e)}")
+
+                page += 1
+            except Exception as e:
+                formatted_error = format_api_error(e)
+                raise HumanloopRuntimeError(f"Failed to fetch page {page}: {formatted_error}")
+
+        if successful_files:
+            logger.info(f"Successfully pulled {len(successful_files)} files")
+        if failed_files:
+            logger.warning(f"Failed to pull {len(failed_files)} files")
+
+        return successful_files, failed_files
+
+    def pull(self, path: Optional[str] = None, environment: Optional[str] = None) -> Tuple[List[str], List[str]]:
+        """Pull files from Humanloop to local filesystem.
+
+        If the path ends with .prompt or .agent, pulls that specific file.
+        Otherwise, pulls all files under the specified path.
+        If no path is provided, pulls all files from the root.
+
+        Args:
+            path: The path to pull from (either a specific file or directory)
+            environment: The environment to pull from
+
+        Returns:
+            Tuple of two lists:
+            - First list contains paths of successfully synced files
+            - Second list contains paths of files that failed to sync (e.g. failed to write to disk or missing raw content)
+
+        Raises:
+            HumanloopRuntimeError: If there's an error communicating with the API
+        """
+        start_time = time.time()
+        normalized_path = self._normalize_path(path) if path else None
+
+        logger.info(
+            f"Starting pull operation: path={normalized_path or '(root)'}, environment={environment or '(default)'}"
+        )
+
+        try:
+            if (
+                normalized_path is None or path is None
+            ):  # path being None means normalized_path is None, but we check both for improved type safety
+                # Pull all files from the root
+                logger.debug("Pulling all files from root")
+                successful_files, failed_files = self._pull_directory(
+                    path=None,
+                    environment=environment,
+                )
+            else:
+                if self.is_file(path.strip()):
+                    logger.debug(f"Pulling file: {normalized_path}")
+                    if self._pull_file(path=normalized_path, environment=environment):
+                        successful_files = [path]
+                        failed_files = []
+                    else:
+                        successful_files = []
+                        failed_files = [path]
+                else:
+                    logger.debug(f"Pulling directory: {normalized_path}")
+                    successful_files, failed_files = self._pull_directory(normalized_path, environment)
+
+            # Clear the cache at the end of each pull operation
+            self.clear_cache()
+
+            duration_ms = int((time.time() - start_time) * 1000)
+            logger.info(f"Pull completed in {duration_ms}ms: {len(successful_files)} files succeeded")
+
+            return successful_files, failed_files
+        except Exception as e:
+            raise HumanloopRuntimeError(f"Pull operation failed: {str(e)}")
diff --git a/tests/custom/README.md b/tests/custom/README.md
new file mode 100644
index 00000000..14ff7ed4
--- /dev/null
+++ b/tests/custom/README.md
@@ -0,0 +1,19 @@
+# Custom Tests Directory
+
+This directory contains custom tests for the Humanloop Python SDK. While the main SDK is auto-generated using [Fern](https://buildwithfern.com/), this directory allows us to add our own test implementations that won't be overwritten during regeneration.
+
+## Why Custom Tests?
+
+- **Preservation**: Tests in this directory won't be overwritten when regenerating the SDK
+- **Custom Implementation**: Allows testing of our own implementations beyond the auto-generated code
+- **Integration**: Enables testing of how our custom code works with the auto-generated SDK
+
+## Running Tests
+
+```bash
+# Run all custom tests
+pytest tests/custom/
+
+# Run specific test file
+pytest tests/custom/sync/test_sync_client.py
+```
diff --git a/tests/otel/__init__.py b/tests/custom/__init__.py
similarity index 100%
rename from tests/otel/__init__.py
rename to tests/custom/__init__.py
diff --git a/tests/assets/exact_match.py b/tests/custom/assets/exact_match.py
similarity index 100%
rename from tests/assets/exact_match.py
rename to tests/custom/assets/exact_match.py
diff --git a/tests/assets/levenshtein.py b/tests/custom/assets/levenshtein.py
similarity index 100%
rename from tests/assets/levenshtein.py
rename to tests/custom/assets/levenshtein.py
diff --git a/tests/conftest.py b/tests/custom/conftest.py
similarity index 58%
rename from tests/conftest.py
rename to tests/custom/conftest.py
index 80e3b336..8e400483 100644
--- a/tests/conftest.py
+++ b/tests/custom/conftest.py
@@ -1,18 +1,9 @@
-from dataclasses import asdict, dataclass
 import os
-import random
-import string
-import time
-from typing import Callable, Generator
-import typing
+from typing import Generator
 from unittest.mock import MagicMock
 
-from dotenv import load_dotenv
 import pytest
-from humanloop.base_client import BaseHumanloop
-from humanloop.client import Humanloop
-from humanloop.otel.exporter import HumanloopSpanExporter
-from humanloop.otel.processor import HumanloopSpanProcessor
+from dotenv import load_dotenv
 from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
 from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
 from opentelemetry.instrumentation.cohere import CohereInstrumentor
@@ -26,8 +17,10 @@
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry.trace import Tracer
 
-if typing.TYPE_CHECKING:
-    from humanloop.client import BaseHumanloop
+from humanloop.client import Humanloop
+from humanloop.otel.exporter import HumanloopSpanExporter
+from humanloop.otel.processor import HumanloopSpanProcessor
+from tests.custom.types import GetHumanloopClientFn
 
 
 @pytest.fixture(scope="function")
@@ -87,10 +80,24 @@ def opentelemetry_test_configuration(
         instrumentor.uninstrument()
 
 
+@pytest.fixture(scope="session")
+def get_humanloop_client() -> GetHumanloopClientFn:
+    load_dotenv()
+    if not os.getenv("HUMANLOOP_API_KEY"):
+        pytest.fail("HUMANLOOP_API_KEY is not set for integration tests")
+
+    def _get_humanloop_client(use_local_files: bool = False) -> Humanloop:
+        return Humanloop(
+            api_key=os.getenv("HUMANLOOP_API_KEY"),
+            use_local_files=use_local_files,
+        )
+
+    return _get_humanloop_client
+
+
 @pytest.fixture(scope="function")
 def opentelemetry_hl_test_configuration(
     opentelemetry_test_provider: TracerProvider,
-    humanloop_client: BaseHumanloop,
 ) -> Generator[tuple[Tracer, InMemorySpanExporter], None, None]:
     """Configure OTel backend with HumanloopSpanProcessor.
 
@@ -162,117 +169,3 @@ def call_llm_messages() -> list[ChatCompletionMessageParam]:
             "content": "Bonjour!",
         },
     ]
-
-
-@dataclass
-class APIKeys:
-    openai: str
-    humanloop: str
-
-
-@pytest.fixture(scope="session")
-def api_keys() -> APIKeys:
-    openai_key = os.getenv("OPENAI_API_KEY")
-    humanloop_key = os.getenv("HUMANLOOP_API_KEY")
-    for key_name, key_value in [
-        ("OPENAI_API_KEY", openai_key),
-        ("HUMANLOOP_API_KEY", humanloop_key),
-    ]:
-        if key_value is None:
-            raise ValueError(f"{key_name} is not set in .env file")
-    api_keys = APIKeys(
-        openai=openai_key,  # type: ignore [arg-type]
-        humanloop=humanloop_key,  # type: ignore [arg-type]
-    )
-    for key, value in asdict(api_keys).items():
-        if value is None:
-            raise ValueError(f"{key.upper()} key is not set in .env file")
-    return api_keys
-
-
-@pytest.fixture(scope="session")
-def humanloop_client(api_keys: APIKeys) -> Humanloop:
-    return Humanloop(api_key=api_keys.humanloop)
-
-
-@pytest.fixture(scope="session", autouse=True)
-def load_env():
-    load_dotenv()
-
-
-def directory_cleanup(directory_id: str, humanloop_client: Humanloop):
-    response = humanloop_client.directories.get(directory_id)
-    for file in response.files:
-        file_id = file.id
-        if file.type == "prompt":
-            client = humanloop_client.prompts  # type: ignore [assignment]
-        elif file.type == "tool":
-            client = humanloop_client.tools  # type: ignore [assignment]
-        elif file.type == "dataset":
-            client = humanloop_client.datasets  # type: ignore [assignment]
-        elif file.type == "evaluator":
-            client = humanloop_client.evaluators  # type: ignore [assignment]
-        elif file.type == "flow":
-            client = humanloop_client.flows  # type: ignore [assignment]
-        else:
-            raise NotImplementedError(f"Unknown HL file type {file.type}")
-        client.delete(file_id)
-
-    for subdirectory in response.subdirectories:
-        directory_cleanup(
-            directory_id=subdirectory.id,
-            humanloop_client=humanloop_client,
-        )
-
-    humanloop_client.directories.delete(id=response.id)
-
-
-@dataclass
-class DirectoryIdentifiers:
-    path: str
-    id: str
-
-
-@pytest.fixture()
-def test_directory(
-    humanloop_client: Humanloop,
-) -> Generator[DirectoryIdentifiers, None, None]:
-    # Generate a random  alphanumeric directory name to avoid conflicts
-    def get_random_string(length: int = 16) -> str:
-        return "".join([random.choice(string.ascii_letters + "0123456789") for _ in range(length)])
-
-    directory_path = "SDK_integ_test_" + get_random_string()
-    response = humanloop_client.directories.create(path=directory_path)
-    assert response.path == directory_path
-    try:
-        yield DirectoryIdentifiers(
-            path=response.path,
-            id=response.id,
-        )
-    finally:
-        time.sleep(1)
-        directory_cleanup(response.id, humanloop_client)
-
-
-@pytest.fixture()
-def get_test_path(test_directory: DirectoryIdentifiers) -> Callable[[str], str]:
-    def generate_path(name: str) -> str:
-        return f"{test_directory.path}/{name}"
-
-    return generate_path
-
-
-# @pytest.fixture(scope="session", autouse=True)
-# def cleanup_test_dirs(humanloop_client: Humanloop):
-#     def _cleanup_all_test_dirs():
-#         dirs = humanloop_client.directories.list()
-#         for dir in dirs:
-#             if dir.path.startswith("SDK_integ_test_"):
-#                 directory_cleanup(
-#                     directory_id=dir.id,
-#                     humanloop_client=humanloop_client,
-#                 )
-
-#     _cleanup_all_test_dirs()
-#     yield
-#     _cleanup_all_test_dirs()
diff --git a/tests/custom/integration/__init__.py b/tests/custom/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/custom/integration/conftest.py b/tests/custom/integration/conftest.py
new file mode 100644
index 00000000..039b0f1c
--- /dev/null
+++ b/tests/custom/integration/conftest.py
@@ -0,0 +1,272 @@
+import os
+import time
+import typing
+import uuid
+from collections.abc import Generator
+from dataclasses import dataclass
+from typing import Union
+
+import dotenv
+import pytest
+from click.testing import CliRunner
+
+from humanloop import AgentResponse, PromptResponse
+from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams
+from tests.custom.types import GetHumanloopClientFn, SyncableFile
+
+
+@dataclass
+class ResourceIdentifiers:
+    file_id: str
+    file_path: str
+
+
+@pytest.fixture(scope="session")
+def openai_key() -> str:
+    dotenv.load_dotenv()
+    if not os.getenv("OPENAI_API_KEY"):
+        pytest.fail("OPENAI_API_KEY is not set for integration tests")
+    return os.getenv("OPENAI_API_KEY")  # type: ignore [return-value]
+
+
+@pytest.fixture(scope="function")
+def sdk_test_dir(get_humanloop_client: GetHumanloopClientFn) -> Generator[str, None, None]:
+    humanloop_client = get_humanloop_client()
+
+    def _get_subclient(file_type: str):
+        try:
+            return {
+                "agent": humanloop_client.agents,
+                "prompt": humanloop_client.prompts,
+                "dataset": humanloop_client.datasets,
+                "evaluator": humanloop_client.evaluators,
+                "flow": humanloop_client.flows,
+                "tool": humanloop_client.tools,
+            }[file_type]
+        except KeyError:
+            raise NotImplementedError(f"Unknown file type: {file_type}")
+
+    def cleanup_directory(directory_id: str):
+        directory_response = humanloop_client.directories.get(id=directory_id)
+        for subdirectory in directory_response.subdirectories:
+            cleanup_directory(subdirectory.id)
+        for file in directory_response.files:
+            subclient = _get_subclient(typing.cast(str, file.type))
+            subclient.delete(id=file.id)
+        humanloop_client.directories.delete(id=directory_response.id)
+
+    path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}"
+    response = None
+    try:
+        response = humanloop_client.directories.create(path=path)
+        yield response.path
+    except Exception as e:
+        pytest.fail(f"Failed to create directory {path}: {e}")
+    finally:
+        if response:
+            time.sleep(5)
+            cleanup_directory(response.id)
+
+
+@pytest.fixture(scope="function")
+def test_prompt_config() -> PromptKernelRequestParams:
+    return {
+        "provider": "openai",
+        "model": "gpt-4o-mini",
+        "temperature": 0.5,
+        "template": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.",
+            },
+            {
+                "role": "user",
+                "content": "{{question}}",
+            },
+        ],
+    }
+
+
+@pytest.fixture(scope="function")
+def prompt(
+    get_humanloop_client: GetHumanloopClientFn,
+    sdk_test_dir: str,
+    test_prompt_config: PromptKernelRequestParams,
+) -> Generator[ResourceIdentifiers, None, None]:
+    humanloop_client = get_humanloop_client()
+    prompt_path = f"{sdk_test_dir}/prompt"
+    try:
+        response = humanloop_client.prompts.upsert(
+            path=prompt_path,
+            **test_prompt_config,
+        )
+        yield ResourceIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_client.prompts.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def eval_dataset(
+    get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str
+) -> Generator[ResourceIdentifiers, None, None]:
+    humanloop_client = get_humanloop_client()
+    dataset_path = f"{sdk_test_dir}/eval_dataset"
+    try:
+        response = humanloop_client.datasets.upsert(
+            path=dataset_path,
+            datapoints=[
+                {
+                    "inputs": {
+                        "question": "What is the capital of the France?",
+                    },
+                },
+                {
+                    "inputs": {
+                        "question": "What is the capital of the Germany?",
+                    },
+                },
+                {
+                    "inputs": {
+                        "question": "What is 2+2?",
+                    },
+                },
+            ],
+        )
+        yield ResourceIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_client.datasets.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create dataset {dataset_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def eval_prompt(
+    get_humanloop_client: GetHumanloopClientFn,
+    sdk_test_dir: str,
+    openai_key: str,
+    test_prompt_config: PromptKernelRequestParams,
+) -> Generator[ResourceIdentifiers, None, None]:
+    humanloop_client = get_humanloop_client()
+    prompt_path = f"{sdk_test_dir}/eval_prompt"
+    try:
+        response = humanloop_client.prompts.upsert(
+            path=prompt_path,
+            **test_prompt_config,
+        )
+        yield ResourceIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_client.prompts.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def output_not_null_evaluator(
+    get_humanloop_client: GetHumanloopClientFn, sdk_test_dir: str
+) -> Generator[ResourceIdentifiers, None, None]:
+    humanloop_client = get_humanloop_client()
+    evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator"
+    try:
+        response = humanloop_client.evaluators.upsert(
+            path=evaluator_path,
+            spec={
+                "arguments_type": "target_required",
+                "return_type": "boolean",
+                "code": """
+def output_not_null(log: dict) -> bool:
+    return log["output"] is not None
+                """,
+                "evaluator_type": "python",
+            },
+        )
+        yield ResourceIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_client.evaluators.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def id_for_staging_environment(get_humanloop_client: GetHumanloopClientFn, eval_prompt: ResourceIdentifiers) -> str:
+    humanloop_client = get_humanloop_client()
+    response = humanloop_client.prompts.list_environments(id=eval_prompt.file_id)
+    for environment in response:
+        if environment.name == "staging":
+            return environment.id
+    pytest.fail("Staging environment not found")
+
+
+@pytest.fixture
+def syncable_files_fixture(
+    get_humanloop_client: GetHumanloopClientFn,
+    sdk_test_dir: str,
+) -> Generator[list[SyncableFile], None, None]:
+    """Creates a predefined structure of files in Humanloop for testing sync."""
+    files: list[SyncableFile] = [
+        SyncableFile(
+            path="prompts/gpt-4",
+            type="prompt",
+            model="gpt-4",
+        ),
+        SyncableFile(
+            path="prompts/gpt-4o",
+            type="prompt",
+            model="gpt-4o",
+        ),
+        SyncableFile(
+            path="prompts/nested/complex/gpt-4o",
+            type="prompt",
+            model="gpt-4o",
+        ),
+        SyncableFile(
+            path="agents/gpt-4",
+            type="agent",
+            model="gpt-4",
+        ),
+        SyncableFile(
+            path="agents/gpt-4o",
+            type="agent",
+            model="gpt-4o",
+        ),
+    ]
+
+    humanloop_client = get_humanloop_client()
+    created_files = []
+    for file in files:
+        full_path = f"{sdk_test_dir}/{file.path}"
+        response: Union[AgentResponse, PromptResponse]
+        if file.type == "prompt":
+            response = humanloop_client.prompts.upsert(
+                path=full_path,
+                model=file.model,
+            )
+        elif file.type == "agent":
+            response = humanloop_client.agents.upsert(
+                path=full_path,
+                model=file.model,
+            )
+        created_files.append(
+            SyncableFile(
+                path=full_path, type=file.type, model=file.model, id=response.id, version_id=response.version_id
+            )
+        )
+
+    yield created_files
+
+
+@pytest.fixture
+def cli_runner() -> CliRunner:
+    """GIVEN a CLI runner
+    THEN it should be configured to catch exceptions
+    """
+    return CliRunner(mix_stderr=False)
+
+
+@pytest.fixture
+def no_humanloop_api_key_in_env(monkeypatch):
+    """Fixture that removes HUMANLOOP_API_KEY from environment variables.
+
+    Use this fixture in tests that verify behavior when no API key is available
+    in the environment (but could still be loaded from .env files).
+    """
+    # Remove API key from environment
+    monkeypatch.delenv("HUMANLOOP_API_KEY", raising=False)
+    yield
diff --git a/tests/integration/test_decorators.py b/tests/custom/integration/test_decorators.py
similarity index 56%
rename from tests/integration/test_decorators.py
rename to tests/custom/integration/test_decorators.py
index 218453a6..59638896 100644
--- a/tests/integration/test_decorators.py
+++ b/tests/custom/integration/test_decorators.py
@@ -2,27 +2,28 @@
 from typing import Any
 
 from openai import OpenAI
-from humanloop.client import Humanloop
-from humanloop.types.chat_message import ChatMessage
+
+from tests.custom.integration.conftest import GetHumanloopClientFn
 
 
 def test_prompt_decorator(
-    humanloop_test_client: Humanloop,
+    get_humanloop_client: GetHumanloopClientFn,
     sdk_test_dir: str,
     test_prompt_config: dict[str, Any],
     openai_key: str,
 ):
     try:
+        humanloop_client = get_humanloop_client()
         prompt_path = f"{sdk_test_dir}/test_prompt"
-        prompt_response = humanloop_test_client.prompts.upsert(
+        prompt_response = humanloop_client.prompts.upsert(
             path=prompt_path,
             **test_prompt_config,
         )
 
-        prompt_versions_response = humanloop_test_client.prompts.list_versions(id=prompt_response.id)
+        prompt_versions_response = humanloop_client.prompts.list_versions(id=prompt_response.id)
         assert len(prompt_versions_response.records) == 1
 
-        @humanloop_test_client.prompt(path=prompt_path)
+        @humanloop_client.prompt(path=prompt_path)
         def my_prompt(question: str) -> str:
             openai_client = OpenAI(api_key=openai_key)
 
@@ -37,26 +38,27 @@ def my_prompt(question: str) -> str:
         assert "paris" in my_prompt("What is the capital of the France?").lower()
 
         time.sleep(5)
-        prompt_versions_response = humanloop_test_client.prompts.list_versions(id=prompt_response.id)
+        prompt_versions_response = humanloop_client.prompts.list_versions(id=prompt_response.id)
         assert len(prompt_versions_response.records) == 2
 
-        logs_response = humanloop_test_client.logs.list(file_id=prompt_response.id, page=1, size=50)
+        logs_response = humanloop_client.logs.list(file_id=prompt_response.id, page=1, size=50)
 
         assert logs_response.items is not None and len(logs_response.items) == 1
     finally:
-        humanloop_test_client.prompts.delete(id=prompt_response.id)
+        humanloop_client.prompts.delete(id=prompt_response.id)
 
 
 def test_call_prompt_in_flow_decorator(
-    humanloop_test_client: Humanloop,
+    get_humanloop_client: GetHumanloopClientFn,
     sdk_test_dir: str,
     openai_key: str,
 ):
     try:
+        humanloop_client = get_humanloop_client()
 
-        @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow")
+        @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow")
         def my_flow(question: str) -> str:
-            response = humanloop_test_client.prompts.call(
+            response = humanloop_client.prompts.call(
                 path=f"{sdk_test_dir}/test_prompt",
                 prompt={
                     "provider": "openai",
@@ -72,34 +74,35 @@ def my_flow(question: str) -> str:
 
         assert "paris" in my_flow("What is the capital of the France?").lower()
         time.sleep(5)
-        prompt_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt")
+        prompt_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt")
         assert prompt_response is not None
-        prompt_logs_response = humanloop_test_client.logs.list(file_id=prompt_response.id, page=1, size=50)
+        prompt_logs_response = humanloop_client.logs.list(file_id=prompt_response.id, page=1, size=50)
         assert prompt_logs_response.items is not None and len(prompt_logs_response.items) == 1
         prompt_log = prompt_logs_response.items[0]
 
-        flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow")
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow")
         assert flow_response is not None
-        flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50)
+        flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50)
         assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1
         flow_log = flow_logs_response.items[0]
         assert prompt_log.trace_parent_id == flow_log.id
     finally:
-        flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow")
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow")
         if flow_response is not None:
-            humanloop_test_client.flows.delete(id=flow_response.id)
-        prompt_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt")
+            humanloop_client.flows.delete(id=flow_response.id)
+        prompt_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_prompt")
         if prompt_response is not None:
-            humanloop_test_client.prompts.delete(id=prompt_response.id)
+            humanloop_client.prompts.delete(id=prompt_response.id)
 
 
 def test_flow_decorator_logs_exceptions(
-    humanloop_test_client: Humanloop,
+    get_humanloop_client: GetHumanloopClientFn,
     sdk_test_dir: str,
 ):
     try:
+        humanloop_client = get_humanloop_client()
 
-        @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow_log_error")
+        @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow_log_error")
         def my_flow(question: str) -> str:
             raise ValueError("This is a test exception")
 
@@ -107,27 +110,28 @@ def my_flow(question: str) -> str:
 
         time.sleep(5)
 
-        flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error")
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error")
         assert flow_response is not None
-        flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50)
+        flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50)
         assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1
         flow_log = flow_logs_response.items[0]
         assert flow_log.error is not None
         assert flow_log.output is None
 
     finally:
-        flow_response = humanloop_test_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error")
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_error")
         if flow_response is not None:
-            humanloop_test_client.flows.delete(id=flow_response.id)
+            humanloop_client.flows.delete(id=flow_response.id)
 
 
 def test_flow_decorator_populates_output_message(
-    humanloop_test_client: Humanloop,
+    get_humanloop_client: GetHumanloopClientFn,
     sdk_test_dir: str,
 ):
     try:
+        humanloop_client = get_humanloop_client()
 
-        @humanloop_test_client.flow(path=f"{sdk_test_dir}/test_flow_log_output_message")
+        @humanloop_client.flow(path=f"{sdk_test_dir}/test_flow_log_output_message")
         def my_flow(question: str) -> dict[str, Any]:
             return {"role": "user", "content": question}
 
@@ -135,11 +139,9 @@ def my_flow(question: str) -> dict[str, Any]:
 
         time.sleep(5)
 
-        flow_response = humanloop_test_client.files.retrieve_by_path(
-            path=f"{sdk_test_dir}/test_flow_log_output_message"
-        )
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_output_message")
         assert flow_response is not None
-        flow_logs_response = humanloop_test_client.logs.list(file_id=flow_response.id, page=1, size=50)
+        flow_logs_response = humanloop_client.logs.list(file_id=flow_response.id, page=1, size=50)
         assert flow_logs_response.items is not None and len(flow_logs_response.items) == 1
         flow_log = flow_logs_response.items[0]
         assert flow_log.output_message is not None
@@ -147,8 +149,6 @@ def my_flow(question: str) -> dict[str, Any]:
         assert flow_log.error is None
 
     finally:
-        flow_response = humanloop_test_client.files.retrieve_by_path(
-            path=f"{sdk_test_dir}/test_flow_log_output_message"
-        )
+        flow_response = humanloop_client.files.retrieve_by_path(path=f"{sdk_test_dir}/test_flow_log_output_message")
         if flow_response is not None:
-            humanloop_test_client.flows.delete(id=flow_response.id)
+            humanloop_client.flows.delete(id=flow_response.id)
diff --git a/tests/integration/test_evals.py b/tests/custom/integration/test_evals.py
similarity index 66%
rename from tests/integration/test_evals.py
rename to tests/custom/integration/test_evals.py
index 49bbb6dc..d8ba8996 100644
--- a/tests/integration/test_evals.py
+++ b/tests/custom/integration/test_evals.py
@@ -2,18 +2,20 @@
 from typing import Any
 
 import pytest
-from humanloop.client import Humanloop
+
 from humanloop.error import HumanloopRuntimeError
-from tests.integration.conftest import TestIdentifiers
+from tests.custom.integration.conftest import ResourceIdentifiers
+from tests.custom.types import GetHumanloopClientFn
 
 
 def test_eval_run_works_on_online_files(
-    humanloop_test_client: Humanloop,
-    output_not_null_evaluator: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
-    eval_prompt: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    output_not_null_evaluator: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
+    eval_prompt: ResourceIdentifiers,
 ) -> None:
-    humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+    humanloop_client = get_humanloop_client()
+    humanloop_client.evaluations.run(  # type: ignore [attr-defined]
         name="test_eval_run",
         file={
             "path": eval_prompt.file_path,
@@ -29,29 +31,30 @@ def test_eval_run_works_on_online_files(
         ],
     )
     time.sleep(5)
-    response = humanloop_test_client.evaluations.list(file_id=eval_prompt.file_id)
+    response = humanloop_client.evaluations.list(file_id=eval_prompt.file_id)
     assert response.items and len(response.items) == 1
     evaluation_id = response.items[0].id
-    run_evaluation_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined]
+    run_evaluation_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined]
     assert run_evaluation_response.runs[0].status == "completed"
 
 
 def test_eval_run_version_id(
-    humanloop_test_client: Humanloop,
-    output_not_null_evaluator: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
-    eval_prompt: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    output_not_null_evaluator: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
+    eval_prompt: ResourceIdentifiers,
     test_prompt_config: dict[str, Any],
 ) -> None:
+    humanloop_client = get_humanloop_client()
     # GIVEN a prompt where a non-default version is created
     new_test_prompt_config = test_prompt_config.copy()
     new_test_prompt_config["temperature"] = 1
-    new_prompt_version_response = humanloop_test_client.prompts.upsert(
+    new_prompt_version_response = humanloop_client.prompts.upsert(
         path=eval_prompt.file_path,
         **new_test_prompt_config,
     )
     # WHEN creating an evaluation using version_id
-    humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+    humanloop_client.evaluations.run(  # type: ignore [attr-defined]
         name="test_eval_run",
         file={
             "id": new_prompt_version_response.id,
@@ -68,44 +71,45 @@ def test_eval_run_version_id(
         ],
     )
     # THEN we evaluate the version created in the test
-    evaluations_response = humanloop_test_client.evaluations.list(file_id=new_prompt_version_response.id)
+    evaluations_response = humanloop_client.evaluations.list(file_id=new_prompt_version_response.id)
     assert evaluations_response.items and len(evaluations_response.items) == 1
     evaluation_id = evaluations_response.items[0].id
-    runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
+    runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
     assert runs_response.runs[0].status == "completed"
     assert (
         runs_response.runs[0].version
         and runs_response.runs[0].version.version_id == new_prompt_version_response.version_id
     )
-    list_versions_response = humanloop_test_client.prompts.list_versions(id=new_prompt_version_response.id)
+    list_versions_response = humanloop_client.prompts.list_versions(id=new_prompt_version_response.id)
     assert list_versions_response.records and len(list_versions_response.records) == 2
     # THEN the version used in evaluation is not the default version
-    response = humanloop_test_client.prompts.get(id=new_prompt_version_response.id)
+    response = humanloop_client.prompts.get(id=new_prompt_version_response.id)
     assert response.version_id != new_prompt_version_response.version_id
 
 
 def test_eval_run_environment(
-    humanloop_test_client: Humanloop,
-    output_not_null_evaluator: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
-    eval_prompt: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    output_not_null_evaluator: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
+    eval_prompt: ResourceIdentifiers,
     test_prompt_config: dict[str, Any],
     id_for_staging_environment: str,
 ) -> None:
+    humanloop_client = get_humanloop_client()
     # GIVEN a prompt deployed to staging environment
     new_test_prompt_config = test_prompt_config.copy()
     new_test_prompt_config["temperature"] = 1
-    new_prompt_version_response = humanloop_test_client.prompts.upsert(
+    new_prompt_version_response = humanloop_client.prompts.upsert(
         path=eval_prompt.file_path,
         **new_test_prompt_config,
     )
-    humanloop_test_client.prompts.set_deployment(
+    humanloop_client.prompts.set_deployment(
         id=new_prompt_version_response.id,
         environment_id=id_for_staging_environment,
         version_id=new_prompt_version_response.version_id,
     )
     # WHEN creating an evaluation using environment
-    humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+    humanloop_client.evaluations.run(  # type: ignore [attr-defined]
         name="test_eval_run",
         file={
             "id": new_prompt_version_response.id,
@@ -122,30 +126,31 @@ def test_eval_run_environment(
         ],
     )
     # THEN evaluation is done with the version deployed to staging environment
-    evaluations_response = humanloop_test_client.evaluations.list(file_id=new_prompt_version_response.id)
+    evaluations_response = humanloop_client.evaluations.list(file_id=new_prompt_version_response.id)
     assert evaluations_response.items and len(evaluations_response.items) == 1
     evaluation_id = evaluations_response.items[0].id
-    runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
+    runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
     assert runs_response.runs[0].status == "completed"
     assert (
         runs_response.runs[0].version
         and runs_response.runs[0].version.version_id == new_prompt_version_response.version_id
     )
-    default_prompt_version_response = humanloop_test_client.prompts.get(id=new_prompt_version_response.id)
+    default_prompt_version_response = humanloop_client.prompts.get(id=new_prompt_version_response.id)
     assert default_prompt_version_response.version_id != new_prompt_version_response.version_id
 
 
 @pytest.mark.parametrize("version_lookup", ["version_id", "environment"])
 def test_eval_run_version_lookup_fails_with_path(
-    humanloop_test_client: Humanloop,
-    eval_prompt: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_prompt: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
     version_lookup: str,
 ):
     # GIVEN an eval run where we try to evaluate a non-default version
     with pytest.raises(HumanloopRuntimeError) as e:
-        humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+        humanloop_client = get_humanloop_client()
+        humanloop_client.evaluations.run(  # type: ignore [attr-defined]
             name="test_eval_run",
             file={
                 "path": eval_prompt.file_path,
@@ -167,13 +172,14 @@ def test_eval_run_version_lookup_fails_with_path(
 
 
 def test_eval_run_with_version_upsert(
-    humanloop_test_client: Humanloop,
-    eval_prompt: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_prompt: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
     test_prompt_config: dict[str, Any],
 ):
-    humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+    humanloop_client = get_humanloop_client()
+    humanloop_client.evaluations.run(  # type: ignore [attr-defined]
         name="test_eval_run",
         file={
             "path": eval_prompt.file_path,
@@ -193,23 +199,24 @@ def test_eval_run_with_version_upsert(
         ],
     )
     # THEN the version is upserted and evaluation finishes successfully
-    evaluations_response = humanloop_test_client.evaluations.list(file_id=eval_prompt.file_id)
+    evaluations_response = humanloop_client.evaluations.list(file_id=eval_prompt.file_id)
     assert evaluations_response.items and len(evaluations_response.items) == 1
     evaluation_id = evaluations_response.items[0].id
-    runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
+    runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
     assert runs_response.runs[0].status == "completed"
     # THEN a version was upserted based on file.version
-    list_prompt_versions_response = humanloop_test_client.prompts.list_versions(id=eval_prompt.file_id)
+    list_prompt_versions_response = humanloop_client.prompts.list_versions(id=eval_prompt.file_id)
     assert list_prompt_versions_response.records and len(list_prompt_versions_response.records) == 2
 
 
 def test_flow_eval_does_not_work_without_callable(
-    humanloop_test_client: Humanloop,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
 ):
     with pytest.raises(HumanloopRuntimeError) as e:
-        humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+        humanloop_client = get_humanloop_client()
+        humanloop_client.evaluations.run(  # type: ignore [attr-defined]
             name="test_eval_run",
             file={
                 "path": "Test Flow",
@@ -234,28 +241,29 @@ def test_flow_eval_does_not_work_without_callable(
 
 
 def test_flow_eval_works_with_callable(
-    humanloop_test_client: Humanloop,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
     sdk_test_dir: str,
 ):
+    humanloop_client = get_humanloop_client()
     flow_path = f"{sdk_test_dir}/Test Flow"
     # GIVEN a flow with a callable
-    flow_response = humanloop_test_client.flows.upsert(
+    flow_response = humanloop_client.flows.upsert(
         path=flow_path,
         attributes={
             "foo": "bar",
         },
     )
     try:
-        flow = humanloop_test_client.flows.upsert(
+        flow = humanloop_client.flows.upsert(
             path=flow_path,
             attributes={
                 "foo": "bar",
             },
         )
         # WHEN we run an evaluation with the flow
-        humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+        humanloop_client.evaluations.run(  # type: ignore [attr-defined]
             name="test_eval_run",
             file={
                 "id": flow.id,
@@ -272,22 +280,23 @@ def test_flow_eval_works_with_callable(
             ],
         )
         # THEN the evaluation finishes successfully
-        evaluations_response = humanloop_test_client.evaluations.list(file_id=flow.id)
+        evaluations_response = humanloop_client.evaluations.list(file_id=flow.id)
         assert evaluations_response.items and len(evaluations_response.items) == 1
         evaluation_id = evaluations_response.items[0].id
-        runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
+        runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)
         assert runs_response.runs[0].status == "completed"
     finally:
-        humanloop_test_client.flows.delete(id=flow_response.id)
+        humanloop_client.flows.delete(id=flow_response.id)
 
 
 def test_cannot_evaluate_agent_with_callable(
-    humanloop_test_client: Humanloop,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
 ):
     with pytest.raises(ValueError) as e:
-        humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+        humanloop_client = get_humanloop_client()
+        humanloop_client.evaluations.run(  # type: ignore [attr-defined]
             name="test_eval_run",
             file={
                 "path": "Test Agent",
@@ -307,14 +316,15 @@ def test_cannot_evaluate_agent_with_callable(
 
 
 def test_flow_eval_resolves_to_default_with_callable(
-    humanloop_test_client: Humanloop,
-    output_not_null_evaluator: TestIdentifiers,
-    eval_dataset: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    output_not_null_evaluator: ResourceIdentifiers,
+    eval_dataset: ResourceIdentifiers,
     sdk_test_dir: str,
 ) -> None:
+    humanloop_client = get_humanloop_client()
     # GIVEN a flow with some attributes
     flow_path = f"{sdk_test_dir}/Test Flow"
-    flow_response = humanloop_test_client.flows.upsert(
+    flow_response = humanloop_client.flows.upsert(
         path=flow_path,
         attributes={
             "foo": "bar",
@@ -322,7 +332,7 @@ def test_flow_eval_resolves_to_default_with_callable(
     )
     try:
         # WHEN running an evaluation with the flow's callable but no version
-        humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+        humanloop_client.evaluations.run(  # type: ignore [attr-defined]
             name="test_eval_run",
             file={
                 "id": flow_response.id,
@@ -339,24 +349,24 @@ def test_flow_eval_resolves_to_default_with_callable(
             ],
         )
         # THEN the evaluation finishes successfully
-        evaluations_response = humanloop_test_client.evaluations.list(file_id=flow_response.id)
+        evaluations_response = humanloop_client.evaluations.list(file_id=flow_response.id)
         assert evaluations_response.items and len(evaluations_response.items) == 1
         evaluation_id = evaluations_response.items and evaluations_response.items[0].id
-        runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined, arg-type]
+        runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined, arg-type]
         assert runs_response.runs[0].status == "completed"
     finally:
         # Clean up test resources
-        humanloop_test_client.flows.delete(id=flow_response.id)
+        humanloop_client.flows.delete(id=flow_response.id)
 
 
-@pytest.mark.skip(reason="Skip until agents are in prod")
 def test_agent_eval_works_upserting(
-    humanloop_test_client: Humanloop,
-    eval_dataset: TestIdentifiers,
-    output_not_null_evaluator: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    eval_dataset: ResourceIdentifiers,
+    output_not_null_evaluator: ResourceIdentifiers,
     sdk_test_dir: str,
 ):
-    humanloop_test_client.evaluations.run(  # type: ignore [attr-defined]
+    humanloop_client = get_humanloop_client()
+    humanloop_client.evaluations.run(  # type: ignore [attr-defined]
         name="test_eval_run",
         file={
             "path": f"{sdk_test_dir}/Test Agent",
@@ -387,7 +397,7 @@ def test_agent_eval_works_upserting(
             }
         ],
     )
-    files_response = humanloop_test_client.files.list_files(page=1, size=100)
+    files_response = humanloop_client.files.list_files(page=1, size=100)
     eval_agent = None
     for file in files_response.records:
         if file.path == f"{sdk_test_dir}/Test Agent":
@@ -395,8 +405,8 @@ def test_agent_eval_works_upserting(
             break
     assert eval_agent and eval_agent.type == "agent"
     # THEN the evaluation finishes successfully
-    evaluations_response = humanloop_test_client.evaluations.list(file_id=eval_agent.id)
+    evaluations_response = humanloop_client.evaluations.list(file_id=eval_agent.id)
     assert evaluations_response.items and len(evaluations_response.items) == 1
     evaluation_id = evaluations_response.items[0].id
-    runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined, arg-type]
+    runs_response = humanloop_client.evaluations.list_runs_for_evaluation(id=evaluation_id)  # type: ignore [attr-defined, arg-type]
     assert runs_response.runs[0].status == "completed"
diff --git a/tests/integration/test_prompts.py b/tests/custom/integration/test_prompts.py
similarity index 60%
rename from tests/integration/test_prompts.py
rename to tests/custom/integration/test_prompts.py
index 13ca80eb..f6021b7e 100644
--- a/tests/integration/test_prompts.py
+++ b/tests/custom/integration/test_prompts.py
@@ -1,14 +1,15 @@
-from humanloop.client import Humanloop
-
-from tests.integration.conftest import TestIdentifiers
+from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams
+from tests.custom.integration.conftest import ResourceIdentifiers
+from tests.custom.types import GetHumanloopClientFn
 
 
 def test_prompts_call(
-    humanloop_test_client: Humanloop,
-    prompt: TestIdentifiers,
-    test_prompt_config: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    prompt: ResourceIdentifiers,
+    test_prompt_config: PromptKernelRequestParams,
 ) -> None:
-    response = humanloop_test_client.prompts.call(  # type: ignore [attr-defined]
+    humanloop_client = get_humanloop_client()
+    response = humanloop_client.prompts.call(  # type: ignore [attr-defined]
         path=prompt.file_path,
         prompt={**test_prompt_config},  # type: ignore [misc, arg-type, typeddict-item, dict-item, list-item]
         inputs={"question": "What is the capital of the France?"},
@@ -24,11 +25,12 @@ def test_prompts_call(
 
 
 def test_prompts_call_stream(
-    humanloop_test_client: Humanloop,
-    prompt: TestIdentifiers,
-    test_prompt_config: TestIdentifiers,
+    get_humanloop_client: GetHumanloopClientFn,
+    prompt: ResourceIdentifiers,
+    test_prompt_config: PromptKernelRequestParams,
 ) -> None:
-    response = humanloop_test_client.prompts.call_stream(  # type: ignore [attr-defined]
+    humanloop_client = get_humanloop_client()
+    response = humanloop_client.prompts.call_stream(  # type: ignore [attr-defined]
         path=prompt.file_path,
         prompt={**test_prompt_config},  # type: ignore [misc, arg-type, typeddict-item, dict-item, list-item]
         inputs={"question": "What is the capital of the France?"},
diff --git a/tests/custom/integration/test_sync.py b/tests/custom/integration/test_sync.py
new file mode 100644
index 00000000..8b33f7a4
--- /dev/null
+++ b/tests/custom/integration/test_sync.py
@@ -0,0 +1,211 @@
+import typing
+from pathlib import Path
+from typing import List, Union
+
+import pytest
+
+from humanloop import AgentResponse, PromptResponse
+from humanloop.agents.client import AgentsClient
+from humanloop.error import HumanloopRuntimeError
+from humanloop.prompts.client import PromptsClient
+from tests.custom.types import GetHumanloopClientFn, SyncableFile
+
+
+@pytest.fixture
+def cleanup_local_files():
+    """Cleanup any locally synced files after tests"""
+    yield
+    local_dir = Path("humanloop")
+    if local_dir.exists():
+        import shutil
+
+        shutil.rmtree(local_dir)
+
+
+def test_pull_basic(
+    syncable_files_fixture: List[SyncableFile],
+    get_humanloop_client: GetHumanloopClientFn,
+):
+    """Test that humanloop.sync() correctly syncs remote files to local filesystem"""
+    # GIVEN a set of files in the remote system (from syncable_files_fixture)
+    humanloop_client = get_humanloop_client()
+
+    # WHEN running the sync
+    humanloop_client.pull()
+
+    # THEN our local filesystem should mirror the remote filesystem in the HL Workspace
+    for file in syncable_files_fixture:
+        extension = f".{file.type}"
+        local_path = Path("humanloop") / f"{file.path}{extension}"
+
+        # THEN the file and its directory should exist
+        assert local_path.exists(), f"Expected synced file at {local_path}"
+        assert local_path.parent.exists(), f"Expected directory at {local_path.parent}"
+
+        # THEN the file should not be empty
+        content = local_path.read_text()
+        assert content, f"File at {local_path} should not be empty"
+
+
+def test_overload_with_local_files(
+    get_humanloop_client: GetHumanloopClientFn,
+    syncable_files_fixture: List[SyncableFile],
+):
+    """Test that overload_with_local_files correctly handles local files."""
+    # GIVEN a client with use_local_files=True and pulled files
+    humanloop_client = get_humanloop_client(use_local_files=True)
+    humanloop_client.pull()
+
+    # GIVEN a test file from the structure
+    test_file = syncable_files_fixture[0]
+    extension = f".{test_file.type}"
+    local_path = Path("humanloop") / f"{test_file.path}{extension}"
+
+    # THEN the file should exist locally
+    assert local_path.exists(), f"Expected pulled file at {local_path}"
+    assert local_path.parent.exists(), f"Expected directory at {local_path.parent}"
+
+    # WHEN calling the file
+    response: Union[AgentResponse, PromptResponse]
+    if test_file.type == "prompt":
+        response = humanloop_client.prompts.call(  # type: ignore [assignment]
+            path=test_file.path, messages=[{"role": "user", "content": "Testing"}]
+        )
+    elif test_file.type == "agent":
+        response = humanloop_client.agents.call(  # type: ignore [assignment]
+            path=test_file.path, messages=[{"role": "user", "content": "Testing"}]
+        )
+    # THEN the response should not be None
+    assert response is not None
+
+    # WHEN calling with an invalid path
+    # THEN it should raise HumanloopRuntimeError
+    with pytest.raises(HumanloopRuntimeError):
+        try:
+            sub_client: Union[PromptsClient, AgentsClient] = typing.cast(
+                Union[PromptsClient, AgentsClient],
+                {
+                    "prompt": humanloop_client.prompts,
+                    "agent": humanloop_client.agents,
+                }[test_file.type],
+            )
+            sub_client.call(path="invalid/path")
+        except KeyError:
+            raise NotImplementedError(f"Unknown file type: {test_file.type}")
+
+
+def test_overload_log_with_local_files(
+    get_humanloop_client: GetHumanloopClientFn,
+    syncable_files_fixture: List[SyncableFile],
+    sdk_test_dir: str,
+):
+    """Test that overload_with_local_files correctly handles local files for log operations."""
+    # GIVEN a client with use_local_files=True and pulled files
+    humanloop_client = get_humanloop_client(use_local_files=True)
+    humanloop_client.pull()
+
+    # GIVEN a test file from the structure
+    test_file = syncable_files_fixture[0]
+    extension = f".{test_file.type}"
+    local_path = Path("humanloop") / f"{test_file.path}{extension}"
+
+    # THEN the file should exist locally
+    assert local_path.exists(), f"Expected pulled file at {local_path}"
+    assert local_path.parent.exists(), f"Expected directory at {local_path.parent}"
+
+    # WHEN logging with the pulled file
+    if test_file.type == "prompt":
+        response = humanloop_client.prompts.log(  # type: ignore [assignment]
+            path=test_file.path, messages=[{"role": "user", "content": "Testing"}], output="Test response"
+        )
+    elif test_file.type == "agent":
+        response = humanloop_client.agents.log(  # type: ignore [assignment]
+            path=test_file.path, messages=[{"role": "user", "content": "Testing"}], output="Test response"
+        )
+    # THEN the response should not be None
+    assert response is not None
+
+    # WHEN logging with an invalid path
+    # THEN it should raise HumanloopRuntimeError
+    with pytest.raises(HumanloopRuntimeError):
+        if test_file.type == "prompt":
+            humanloop_client.prompts.log(
+                path=f"{sdk_test_dir}/invalid/path",
+                messages=[{"role": "user", "content": "Testing"}],
+                output="Test response",
+            )
+        elif test_file.type == "agent":
+            humanloop_client.agents.log(
+                path=f"{sdk_test_dir}/invalid/path",
+                messages=[{"role": "user", "content": "Testing"}],
+                output="Test response",
+            )
+
+
+def test_overload_version_environment_handling(
+    get_humanloop_client: GetHumanloopClientFn,
+    syncable_files_fixture: List[SyncableFile],
+):
+    """Test that overload_with_local_files correctly handles version_id and environment parameters."""
+    # GIVEN a client with use_local_files=True and pulled files
+    humanloop_client = get_humanloop_client(use_local_files=True)
+    humanloop_client.pull()
+
+    # GIVEN a test file from the structure
+    test_file = syncable_files_fixture[0]
+    extension = f".{test_file.type}"
+    local_path = Path("humanloop") / f"{test_file.path}{extension}"
+
+    # THEN the file should exist locally
+    assert local_path.exists(), f"Expected pulled file at {local_path}"
+    assert local_path.parent.exists(), f"Expected directory at {local_path.parent}"
+
+    # WHEN calling with version_id
+    # THEN it should raise HumanloopRuntimeError
+    with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"):
+        if test_file.type == "prompt":
+            humanloop_client.prompts.call(
+                path=test_file.path,
+                version_id=test_file.version_id,
+                messages=[{"role": "user", "content": "Testing"}],
+            )
+        elif test_file.type == "agent":
+            humanloop_client.agents.call(
+                path=test_file.path,
+                version_id=test_file.version_id,
+                messages=[{"role": "user", "content": "Testing"}],
+            )
+
+    # WHEN calling with environment
+    # THEN it should raise HumanloopRuntimeError
+    with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"):
+        if test_file.type == "prompt":
+            humanloop_client.prompts.call(
+                path=test_file.path,
+                environment="production",
+                messages=[{"role": "user", "content": "Testing"}],
+            )
+        elif test_file.type == "agent":
+            humanloop_client.agents.call(
+                path=test_file.path,
+                environment="production",
+                messages=[{"role": "user", "content": "Testing"}],
+            )
+
+    # WHEN calling with both version_id and environment
+    # THEN it should raise HumanloopRuntimeError
+    with pytest.raises(HumanloopRuntimeError, match="Cannot use local file.*version_id or environment was specified"):
+        if test_file.type == "prompt":
+            humanloop_client.prompts.call(
+                path=test_file.path,
+                version_id=test_file.version_id,
+                environment="staging",
+                messages=[{"role": "user", "content": "Testing"}],
+            )
+        elif test_file.type == "agent":
+            humanloop_client.agents.call(
+                path=test_file.path,
+                version_id=test_file.version_id,
+                environment="staging",
+                messages=[{"role": "user", "content": "Testing"}],
+            )
diff --git a/tests/custom/integration/test_sync_cli.py b/tests/custom/integration/test_sync_cli.py
new file mode 100644
index 00000000..5631d5f0
--- /dev/null
+++ b/tests/custom/integration/test_sync_cli.py
@@ -0,0 +1,181 @@
+from pathlib import Path
+from unittest import mock
+
+import pytest
+from click.testing import CliRunner
+
+from humanloop.cli.__main__ import cli
+from tests.custom.types import SyncableFile
+
+
+@pytest.fixture
+def no_env_file_loading():
+    """Fixture that prevents loading API keys from any .env files.
+
+    Use this fixture in tests that verify behavior when no .env files should
+    be processed, regardless of whether they exist or not.
+    """
+    # Prevent any .env file from being loaded
+    with mock.patch("humanloop.cli.__main__.load_dotenv", lambda *args, **kwargs: None):
+        yield
+
+
+def test_pull_without_api_key(cli_runner: CliRunner, no_humanloop_api_key_in_env, no_env_file_loading):
+    """GIVEN no API key in environment
+    WHEN running pull command
+    THEN it should fail with appropriate error message
+    """
+    # WHEN running pull command
+    result = cli_runner.invoke(cli, ["pull", "--local-files-directory", "humanloop"])
+
+    # THEN it should fail with appropriate error message
+    assert result.exit_code == 1  # Our custom error code for API key issues
+    assert "No API key found" in result.output
+    assert "Set HUMANLOOP_API_KEY in .env file or environment" in result.output
+
+
+def test_pull_basic(
+    cli_runner: CliRunner,
+    syncable_files_fixture: list[SyncableFile],
+    tmp_path: Path,  # this path is used as a temporary store for files locally
+):
+    # GIVEN a base directory for pulled files
+    base_dir = str(tmp_path / "humanloop")
+
+    # WHEN running pull command
+    result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--verbose"])
+
+    # THEN it should succeed
+    assert result.exit_code == 0
+    assert "Pulling files from Humanloop..." in result.output
+    assert "Pull completed" in result.output
+
+    # THEN the files should exist locally
+    for file in syncable_files_fixture:
+        extension = f".{file.type}"
+        local_path = Path(base_dir) / f"{file.path}{extension}"
+        assert local_path.exists(), f"Expected synced file at {local_path}"
+        assert local_path.parent.exists(), f"Expected directory at {local_path.parent}"
+        assert local_path.read_text(), f"File at {local_path} should not be empty"
+
+
+def test_pull_with_specific_path(
+    cli_runner: CliRunner,
+    syncable_files_fixture: list[SyncableFile],
+    tmp_path: Path,
+):
+    """GIVEN a specific path to pull
+    WHEN running pull command with path
+    THEN it should pull only files from that path
+    """
+    # GIVEN a base directory and specific path
+    base_dir = str(tmp_path / "humanloop")
+    test_path = syncable_files_fixture[
+        0
+    ].path.split(
+        "/"
+    )[
+        0
+    ]  # Retrieve the prefix of the first file's path which corresponds to the sdk_test_dir used within syncable_files_fixture
+
+    # WHEN running pull command with path
+    result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--path", test_path, "--verbose"])
+
+    # THEN it should succeed and show the path
+    assert result.exit_code == 0
+    assert f"Path: {test_path}" in result.output
+
+    # THEN only files from that path should exist locally
+    for file in syncable_files_fixture:
+        extension = f".{file.type}"
+        local_path = Path(base_dir) / f"{file.path}{extension}"
+        if file.path.startswith(test_path):
+            assert local_path.exists(), f"Expected synced file at {local_path}"
+        else:
+            assert not local_path.exists(), f"Unexpected file at {local_path}"
+
+
+def test_pull_with_environment(
+    cli_runner: CliRunner,
+    syncable_files_fixture: list[SyncableFile],
+    tmp_path: Path,
+):
+    # GIVEN a base directory and environment
+    base_dir = str(tmp_path / "humanloop")
+    environment = "staging"
+
+    # WHEN running pull command with environment
+    result = cli_runner.invoke(
+        cli,
+        [
+            "pull",
+            "--local-files-directory",
+            base_dir,
+            "--environment",
+            environment,
+            "--verbose",
+        ],
+    )
+
+    # THEN it should succeed and show the environment
+    assert result.exit_code == 0
+    assert f"Environment: {environment}" in result.output
+
+
+def test_pull_with_quiet_mode(
+    cli_runner: CliRunner,
+    syncable_files_fixture: list[SyncableFile],
+    tmp_path: Path,
+):
+    # GIVEN a base directory and quiet mode
+    base_dir = str(tmp_path / "humanloop")
+
+    # WHEN running pull command with quiet mode
+    result = cli_runner.invoke(cli, ["pull", "--local-files-directory", base_dir, "--quiet"])
+
+    # THEN it should succeed but not show file list
+    assert result.exit_code == 0
+    assert "Successfully pulled" not in result.output
+
+    # THEN files should still be pulled
+    for file in syncable_files_fixture:
+        extension = f".{file.type}"
+        local_path = Path(base_dir) / f"{file.path}{extension}"
+        assert local_path.exists(), f"Expected synced file at {local_path}"
+
+
+def test_pull_with_invalid_path(
+    cli_runner: CliRunner,
+):
+    # GIVEN an invalid base directory
+    path = "nonexistent/path"
+
+    # WHEN running pull command
+    result = cli_runner.invoke(cli, ["pull", "--path", path])
+
+    # THEN it should fail
+    assert result.exit_code == 1
+    assert "Error" in result.output
+
+
+def test_pull_with_invalid_environment(cli_runner: CliRunner, tmp_path: Path):
+    # GIVEN an invalid environment
+    environment = "nonexistent"
+    base_dir = str(tmp_path / "humanloop")
+
+    # WHEN running pull command
+    result = cli_runner.invoke(
+        cli,
+        [
+            "pull",
+            "--local-files-directory",
+            base_dir,
+            "--environment",
+            environment,
+            "--verbose",
+        ],
+    )
+
+    # THEN it should fail
+    assert result.exit_code == 1
+    assert "Error" in result.output
diff --git a/tests/custom/otel/__init__.py b/tests/custom/otel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/otel/test_helpers.py b/tests/custom/otel/test_helpers.py
similarity index 99%
rename from tests/otel/test_helpers.py
rename to tests/custom/otel/test_helpers.py
index 3bd5ce45..f7ff6555 100644
--- a/tests/otel/test_helpers.py
+++ b/tests/custom/otel/test_helpers.py
@@ -1,7 +1,8 @@
 import pytest
-from humanloop.otel.helpers import read_from_opentelemetry_span, write_to_opentelemetry_span
 from opentelemetry.sdk.trace import Span
 
+from humanloop.otel.helpers import read_from_opentelemetry_span, write_to_opentelemetry_span
+
 
 def test_read_empty(test_span: Span):
     with pytest.raises(TypeError):
diff --git a/tests/custom/sync/__init__.py b/tests/custom/sync/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/custom/sync/test_client.py b/tests/custom/sync/test_client.py
new file mode 100644
index 00000000..ac83d259
--- /dev/null
+++ b/tests/custom/sync/test_client.py
@@ -0,0 +1,128 @@
+import logging
+from pathlib import Path
+from typing import Literal
+from unittest.mock import Mock, patch
+
+import pytest
+
+from humanloop.error import HumanloopRuntimeError
+from humanloop.sync.sync_client import SerializableFileType, SyncClient
+
+
+@pytest.fixture
+def mock_client() -> Mock:
+    return Mock()
+
+
+@pytest.fixture
+def sync_client(mock_client: Mock, tmp_path: Path) -> SyncClient:
+    return SyncClient(
+        client=mock_client,
+        base_dir=str(tmp_path),
+        cache_size=10,
+        log_level=logging.DEBUG,  # DEBUG level for testing  # noqa: F821
+    )
+
+
+def test_init(sync_client: SyncClient, tmp_path: Path):
+    """Test basic initialization of SyncClient."""
+    # GIVEN a SyncClient instance
+    # THEN it should be initialized with correct base directory, cache size and file types
+    assert sync_client.base_dir == tmp_path
+    assert sync_client._cache_size == 10
+    assert sync_client.SERIALIZABLE_FILE_TYPES == frozenset(["prompt", "agent"])
+
+
+def test_normalize_path(sync_client: SyncClient):
+    """Test path normalization functionality."""
+    # GIVEN various file paths with different formats
+    test_cases = [
+        ("path/to/file.prompt", "path/to/file"),
+        ("path\\to\\file.agent", "path/to/file"),
+        ("trailing/slashes/file.agent/", "trailing/slashes/file"),
+        ("multiple//slashes//file.prompt", "multiple/slashes/file"),
+    ]
+
+    for input_path, expected in test_cases:
+        # WHEN they are normalized
+        normalized = sync_client._normalize_path(input_path)
+        # THEN they should be converted to the expected format
+        assert normalized == expected
+
+    # Test absolute path raises error
+    with pytest.raises(HumanloopRuntimeError, match="Absolute paths are not supported"):
+        sync_client._normalize_path("/leading/slashes/file.prompt")
+
+
+def test_is_file(sync_client: SyncClient):
+    """Test file type detection."""
+    # GIVEN various file paths
+    # WHEN checking if they are valid file types
+    # THEN only .prompt and .agent files should return True
+    assert sync_client.is_file("test.prompt")
+    assert sync_client.is_file("test.agent")
+    assert not sync_client.is_file("test.txt")
+    assert not sync_client.is_file("test")
+
+
+def test_save_and_read_file(sync_client: SyncClient):
+    """Test saving and reading files."""
+    # GIVEN a file content and path
+    content = "test content"
+    path = "test/path"
+    file_type: SerializableFileType = "prompt"
+
+    # WHEN saving the file
+    sync_client._save_serialized_file(content, path, "prompt")
+    saved_path = sync_client.base_dir / path
+    saved_path = saved_path.parent / f"{saved_path.stem}.{file_type}"
+
+    # THEN the file should exist on disk
+    assert saved_path.exists()
+
+    # WHEN reading the file
+    read_content = sync_client.get_file_content(path, file_type)
+
+    # THEN the content should match
+    assert read_content == content
+
+
+def test_error_handling(sync_client: SyncClient):
+    """Test error handling in various scenarios."""
+    # GIVEN a nonexistent file
+    # WHEN trying to read it
+    # THEN a HumanloopRuntimeError should be raised
+    with pytest.raises(HumanloopRuntimeError, match="Local file not found"):
+        sync_client.get_file_content("nonexistent", "prompt")
+
+    # GIVEN an API error
+    # WHEN trying to pull a file
+    # THEN it should return False
+    with patch.object(sync_client.client.files, "retrieve_by_path", side_effect=Exception("API Error")):
+        assert not sync_client._pull_file("test.prompt")
+
+
+def test_cache_functionality(sync_client: SyncClient):
+    """Test LRU cache functionality."""
+    # GIVEN a test file
+    content = "test content"
+    path = "test/path"
+    file_type: Literal["prompt", "agent"] = "prompt"
+    sync_client._save_serialized_file(content, path, file_type)
+
+    # WHEN reading the file for the first time
+    sync_client.get_file_content(path, file_type)
+    # THEN it should hit disk (implicitly verified by no cache hit)
+
+    # WHEN modifying the file on disk
+    saved_path = sync_client.base_dir / f"{path}.{file_type}"
+    saved_path.write_text("modified content")
+
+    # THEN subsequent reads should use cache
+    assert sync_client.get_file_content(path, file_type) == content
+
+    # WHEN clearing the cache
+    sync_client.clear_cache()
+
+    # THEN new content should be read from disk
+    assert sync_client.get_file_content(path, file_type) == "modified content"
diff --git a/tests/custom/types.py b/tests/custom/types.py
new file mode 100644
index 00000000..b270d9fa
--- /dev/null
+++ b/tests/custom/types.py
@@ -0,0 +1,16 @@
+from typing import NamedTuple, Protocol
+
+from humanloop import FileType
+from humanloop.client import Humanloop
+
+
+class GetHumanloopClientFn(Protocol):
+    def __call__(self, use_local_files: bool = False) -> Humanloop: ...
+
+
+class SyncableFile(NamedTuple):
+    path: str
+    type: FileType
+    model: str
+    id: str = ""
+    version_id: str = ""
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
deleted file mode 100644
index d14042a3..00000000
--- a/tests/integration/conftest.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import io
-import os
-import uuid
-from contextlib import contextmanager, redirect_stdout
-from dataclasses import dataclass
-from typing import Any, ContextManager, Generator, TextIO
-
-import dotenv
-import pytest
-from humanloop.client import Humanloop
-from humanloop.requests.prompt_kernel_request import PromptKernelRequestParams
-
-
-@dataclass
-class TestIdentifiers:
-    file_id: str
-    file_path: str
-
-
-@pytest.fixture()
-def capture_stdout() -> ContextManager[TextIO]:
-    @contextmanager
-    def _context_manager():
-        f = io.StringIO()
-        with redirect_stdout(f):
-            yield f
-
-    return _context_manager  # type: ignore [return-value]
-
-
-@pytest.fixture(scope="session")
-def openai_key() -> str:
-    dotenv.load_dotenv()
-    if not os.getenv("OPENAI_API_KEY"):
-        pytest.fail("OPENAI_API_KEY is not set for integration tests")
-    return os.getenv("OPENAI_API_KEY")  # type: ignore [return-value]
-
-
-@pytest.fixture(scope="session")
-def humanloop_test_client() -> Humanloop:
-    dotenv.load_dotenv()
-    if not os.getenv("HUMANLOOP_API_KEY"):
-        pytest.fail("HUMANLOOP_API_KEY is not set for integration tests")
-    return Humanloop(api_key=os.getenv("HUMANLOOP_API_KEY"))  # type: ignore [return-value]
-
-
-@pytest.fixture(scope="function")
-def sdk_test_dir(humanloop_test_client: Humanloop) -> Generator[str, None, None]:
-    path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}"
-    try:
-        response = humanloop_test_client.directories.create(path=path)
-        yield response.path
-        humanloop_test_client.directories.delete(id=response.id)
-    except Exception as e:
-        pytest.fail(f"Failed to create directory {path}: {e}")
-
-
-@pytest.fixture(scope="function")
-def test_prompt_config() -> PromptKernelRequestParams:
-    return {
-        "provider": "openai",
-        "model": "gpt-4o-mini",
-        "temperature": 0.5,
-        "template": [
-            {
-                "role": "system",
-                "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.",
-            },
-            {
-                "role": "user",
-                "content": "{{question}}",
-            },
-        ],
-    }
-
-
-@pytest.fixture(scope="function")
-def eval_dataset(humanloop_test_client: Humanloop, sdk_test_dir: str) -> Generator[TestIdentifiers, None, None]:
-    dataset_path = f"{sdk_test_dir}/eval_dataset"
-    try:
-        response = humanloop_test_client.datasets.upsert(
-            path=dataset_path,
-            datapoints=[
-                {
-                    "inputs": {
-                        "question": "What is the capital of the France?",
-                    },
-                },
-                {
-                    "inputs": {
-                        "question": "What is the capital of the Germany?",
-                    },
-                },
-                {
-                    "inputs": {
-                        "question": "What is 2+2?",
-                    },
-                },
-            ],
-        )
-        yield TestIdentifiers(file_id=response.id, file_path=response.path)
-        humanloop_test_client.datasets.delete(id=response.id)
-    except Exception as e:
-        pytest.fail(f"Failed to create dataset {dataset_path}: {e}")
-
-
-@pytest.fixture(scope="function")
-def eval_prompt(
-    humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
-) -> Generator[TestIdentifiers, None, None]:
-    prompt_path = f"{sdk_test_dir}/eval_prompt"
-    try:
-        response = humanloop_test_client.prompts.upsert(
-            path=prompt_path,
-            **test_prompt_config,
-        )
-        yield TestIdentifiers(file_id=response.id, file_path=response.path)
-        humanloop_test_client.prompts.delete(id=response.id)
-    except Exception as e:
-        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
-
-
-@pytest.fixture(scope="function")
-def prompt(
-    humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
-) -> Generator[TestIdentifiers, None, None]:
-    prompt_path = f"{sdk_test_dir}/prompt"
-    try:
-        response = humanloop_test_client.prompts.upsert(
-            path=prompt_path,
-            **test_prompt_config,
-        )
-        yield TestIdentifiers(file_id=response.id, file_path=response.path)
-        humanloop_test_client.prompts.delete(id=response.id)
-    except Exception as e:
-        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
-
-
-@pytest.fixture(scope="function")
-def output_not_null_evaluator(
-    humanloop_test_client: Humanloop, sdk_test_dir: str
-) -> Generator[TestIdentifiers, None, None]:
-    evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator"
-    try:
-        response = humanloop_test_client.evaluators.upsert(
-            path=evaluator_path,
-            spec={
-                "arguments_type": "target_required",
-                "return_type": "boolean",
-                "code": """
-def output_not_null(log: dict) -> bool:
-    return log["output"] is not None
-                """,
-                "evaluator_type": "python",
-            },
-        )
-        yield TestIdentifiers(file_id=response.id, file_path=response.path)
-        humanloop_test_client.evaluators.delete(id=response.id)
-    except Exception as e:
-        pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}")
-
-
-@pytest.fixture(scope="function")
-def id_for_staging_environment(humanloop_test_client: Humanloop, eval_prompt: TestIdentifiers) -> str:
-    response = humanloop_test_client.prompts.list_environments(id=eval_prompt.file_id)
-    for environment in response:
-        if environment.name == "staging":
-            return environment.id
-    pytest.fail("Staging environment not found")
diff --git a/tests/utils/assets/models/__init__.py b/tests/utils/assets/models/__init__.py
index 3a1c852e..2cf01263 100644
--- a/tests/utils/assets/models/__init__.py
+++ b/tests/utils/assets/models/__init__.py
@@ -5,7 +5,7 @@
 from .circle import CircleParams
 from .object_with_defaults import ObjectWithDefaultsParams
 from .object_with_optional_field import ObjectWithOptionalFieldParams
-from .shape import ShapeParams, Shape_CircleParams, Shape_SquareParams
+from .shape import Shape_CircleParams, Shape_SquareParams, ShapeParams
 from .square import SquareParams
 from .undiscriminated_shape import UndiscriminatedShapeParams
 
diff --git a/tests/utils/assets/models/circle.py b/tests/utils/assets/models/circle.py
index 759fe3eb..6125ca54 100644
--- a/tests/utils/assets/models/circle.py
+++ b/tests/utils/assets/models/circle.py
@@ -3,6 +3,7 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
+
 from humanloop.core.serialization import FieldMetadata
 
 
diff --git a/tests/utils/assets/models/object_with_defaults.py b/tests/utils/assets/models/object_with_defaults.py
index ef14f7b2..a977b1d2 100644
--- a/tests/utils/assets/models/object_with_defaults.py
+++ b/tests/utils/assets/models/object_with_defaults.py
@@ -3,7 +3,6 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
-import typing_extensions
 
 
 class ObjectWithDefaultsParams(typing_extensions.TypedDict):
diff --git a/tests/utils/assets/models/object_with_optional_field.py b/tests/utils/assets/models/object_with_optional_field.py
index dc3e3eb7..e4ffe724 100644
--- a/tests/utils/assets/models/object_with_optional_field.py
+++ b/tests/utils/assets/models/object_with_optional_field.py
@@ -2,15 +2,17 @@
 
 # This file was auto-generated by Fern from our API Definition.
 
-import typing_extensions
-import typing
-from humanloop.core.serialization import FieldMetadata
 import datetime as dt
+import typing
 import uuid
+
+import typing_extensions
 from .color import Color
 from .shape import ShapeParams
 from .undiscriminated_shape import UndiscriminatedShapeParams
 
+from humanloop.core.serialization import FieldMetadata
+
 
 class ObjectWithOptionalFieldParams(typing_extensions.TypedDict):
     literal: typing.Literal["lit_one"]
diff --git a/tests/utils/assets/models/shape.py b/tests/utils/assets/models/shape.py
index 540ccabd..56394d93 100644
--- a/tests/utils/assets/models/shape.py
+++ b/tests/utils/assets/models/shape.py
@@ -3,8 +3,11 @@
 # This file was auto-generated by Fern from our API Definition.
 
 from __future__ import annotations
-import typing_extensions
+
 import typing
+
+import typing_extensions
+
 from humanloop.core.serialization import FieldMetadata
 
 
diff --git a/tests/utils/assets/models/square.py b/tests/utils/assets/models/square.py
index da4a2111..3f25005d 100644
--- a/tests/utils/assets/models/square.py
+++ b/tests/utils/assets/models/square.py
@@ -3,6 +3,7 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing_extensions
+
 from humanloop.core.serialization import FieldMetadata
 
 
diff --git a/tests/utils/assets/models/undiscriminated_shape.py b/tests/utils/assets/models/undiscriminated_shape.py
index 68876a23..99f12b30 100644
--- a/tests/utils/assets/models/undiscriminated_shape.py
+++ b/tests/utils/assets/models/undiscriminated_shape.py
@@ -3,6 +3,7 @@
 # This file was auto-generated by Fern from our API Definition.
 
 import typing
+
 from .circle import CircleParams
 from .square import SquareParams
 
diff --git a/tests/utils/test_serialization.py b/tests/utils/test_serialization.py
index 2ad8e1b5..40cc847b 100644
--- a/tests/utils/test_serialization.py
+++ b/tests/utils/test_serialization.py
@@ -2,10 +2,10 @@
 
 from typing import Any, List
 
-from humanloop.core.serialization import convert_and_respect_annotation_metadata
-
 from .assets.models import ObjectWithOptionalFieldParams, ShapeParams
 
+from humanloop.core.serialization import convert_and_respect_annotation_metadata
+
 UNION_TEST: ShapeParams = {"radius_measurement": 1.0, "shape_type": "circle", "id": "1"}
 UNION_TEST_CONVERTED = {"shapeType": "circle", "radiusMeasurement": 1.0, "id": "1"}