From 2b912e04eb473a8168b657b20d1fda18c5cbde29 Mon Sep 17 00:00:00 2001 From: "Edgar Y. Walker" Date: Wed, 21 May 2025 16:58:37 +0000 Subject: [PATCH] wip: initialize with auto-generated code --- .python-version | 1 + README.md | 148 ++++++++++ pyproject.toml | 24 ++ requirements.txt | 1 + src/fsbridge/__init__.py | 23 ++ src/fsbridge/atomic.py | 141 ++++++++++ src/fsbridge/core.py | 362 ++++++++++++++++++++++++ src/fsbridge/mapping.py | 568 ++++++++++++++++++++++++++++++++++++++ src/fsbridge/utils.py | 65 +++++ tests/__init__.py | 3 + tests/conftest.py | 58 ++++ tests/test_atomic.py | 62 +++++ tests/test_core.py | 156 +++++++++++ tests/test_integration.py | 109 ++++++++ tests/test_mapping.py | 110 ++++++++ uv.lock | 22 ++ 16 files changed, 1853 insertions(+) create mode 100644 .python-version create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/fsbridge/__init__.py create mode 100644 src/fsbridge/atomic.py create mode 100644 src/fsbridge/core.py create mode 100644 src/fsbridge/mapping.py create mode 100644 src/fsbridge/utils.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_atomic.py create mode 100644 tests/test_core.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_mapping.py create mode 100644 uv.lock diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..c8cfe39 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/README.md b/README.md index 054a67b..47c6532 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,150 @@ # fsbridge + A pragmatic monkey-patching library that transparently redirects Python's standard file operations to FSSpec backends, allowing existing code to work with any filesystem without modification. + +[![PyPI version](https://badge.fury.io/py/fsbridge.svg)](https://badge.fury.io/py/fsbridge) +[![Python Versions](https://img.shields.io/pypi/pyversions/fsbridge.svg)](https://pypi.org/project/fsbridge/) +[![License](https://img.shields.io/github/license/yourusername/fsbridge.svg)](https://github.com/yourusername/fsbridge/blob/main/LICENSE) + +## Why fsbridge? + +When working with libraries that use Python's standard file operations (`open()`, `os.path`, `pathlib.Path`, etc.), you often need to: + +- Redirect file outputs to a different location +- Support cloud storage or other non-local filesystems +- Guarantee atomic file operations +- Handle virtual file paths + +Normally, this would require rewriting code to use [FSSpec](https://filesystem-spec.readthedocs.io/) directly. **fsbridge** eliminates this need by transparently redirecting standard Python file operations to FSSpec backends through monkey-patching. + +## Installation + +```bash +pip install fsbridge +``` + +## Quick Start + +```python +from fsbridge import fsbridge_context + +# Redirect all operations on paths beginning with '/outputs/' to '/tmp/actual_outputs/' +with fsbridge_context('/tmp/actual_outputs', path_prefix='/outputs/'): + # Standard file operations work as expected, but are redirected + with open('/outputs/results.csv', 'w') as f: + f.write('data,value\n1,2\n') + + # Works with pathlib too + from pathlib import Path + output_dir = Path('/outputs/subdir') + output_dir.mkdir(exist_ok=True) + + # Even works with os and shutil operations + import os + import shutil + os.makedirs('/outputs/another/nested/dir', exist_ok=True) + shutil.copy('source.txt', '/outputs/destination.txt') +``` + +## Features + +- **Transparent redirection**: No changes needed to existing code +- **Atomic file operations**: All write operations are atomic by default +- **Comprehensive coverage**: Supports `open()`, `pathlib.Path`, `os.*`, `shutil.*`, and more +- **FSSpec compatibility**: Works with any FSSpec backend (local, S3, GCS, HTTP, etc.) +- **Context-based**: Changes are only applied within the context manager +- **Customizable**: Configure path prefixes, atomic behavior, and more + +## How It Works + +fsbridge uses monkey-patching to intercept standard Python file operations and redirect them to FSSpec. While monkey-patching is generally considered a "dirty" approach, it's used here pragmatically to solve a real problem: allowing existing code to work with any filesystem without modification. + +Inside the `fsbridge_context`: + +1. Standard Python file operations are monkey-patched +2. Operations matching the path prefix are redirected to the specified filesystem +3. Write operations are performed atomically through temporary files +4. When the context exits, original functions are restored + +## Advanced Usage + +### Using with S3 Storage + +```python +import s3fs +from fsbridge import fsbridge_context, create_fsspec_fs + +# Create an S3 filesystem +s3 = s3fs.S3FileSystem(anon=False) + +# Redirect '/outputs/' paths to an S3 bucket +with fsbridge_context('my-bucket/outputs', fs=s3, path_prefix='/outputs/'): + # This will write to S3 atomically + with open('/outputs/results.csv', 'w') as f: + f.write('data,value\n1,2\n') +``` + +### Customizing Atomic Behavior + +```python +from fsbridge import fsbridge_context + +with fsbridge_context('/tmp/outputs', path_prefix='/outputs/', + atomic_writes=True, atomic_suffix='.temp', + atomic_prefix='.tmp.'): + # Customize how atomic operations work + with open('/outputs/file.txt', 'w') as f: + f.write('data') +``` + +### Selective Patching + +```python +from fsbridge import fsbridge_context + +with fsbridge_context('/tmp/outputs', path_prefix='/outputs/', + patch_open=True, patch_os=True, + patch_pathlib=True, patch_shutil=False): + # Only selected modules will be patched + pass +``` + +### Explicit Usage (No Monkey-Patching) + +```python +from fsbridge import create_fsspec_mapping + +# Get a mapped filesystem without monkey-patching +fs = create_fsspec_mapping('/tmp/outputs', path_prefix='/outputs/') + +# Use the filesystem explicitly +with fs.open('/outputs/file.txt', 'w') as f: + f.write('data') +``` + +## Supported Operations + +fsbridge redirects the following operations: + +- **builtins**: `open()` +- **pathlib**: `Path()`, `Path.open()`, `Path.mkdir()`, etc. +- **os**: `os.path.exists()`, `os.makedirs()`, `os.mkdir()`, `os.rename()`, etc. +- **os.path**: `exists()`, `isdir()`, `isfile()`, etc. +- **shutil**: `copyfile()`, `copy()`, `copy2()`, `move()`, etc. + +## Limitations + +fsbridge has some limitations you should be aware of: + +1. **Performance overhead**: The monkey-patching and redirection add some overhead +2. **Not all operations supported**: Some specialized file operations might not work as expected +3. **Thread safety**: Be cautious when using in multi-threaded environments +4. **C extensions**: Python modules implemented in C might bypass the Python-level monkey patching + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..25f16bf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + + +[project] +name = "fsbridge" +dynamic = ["version", "dependencies"] +description = "A pragmatic library that transparently redirects Python's standard file operations to FSSpec backends through monkey-patching, enabling existing code to work with any filesystem without modification." +readme = "README.md" +requires-python = ">=3.10" +license = {text = "MIT"} +authors = [ + {name = "Walker Lab", email = "eywalker@uw.edu"} +] + +[tool.setuptools_scm] +write_to = "src/fsbridge/_version.py" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5b094e8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +fsspec>=2023.0.0 \ No newline at end of file diff --git a/src/fsbridge/__init__.py b/src/fsbridge/__init__.py new file mode 100644 index 0000000..0a41307 --- /dev/null +++ b/src/fsbridge/__init__.py @@ -0,0 +1,23 @@ +""" +fsbridge: Transparent redirection of Python file operations to fsspec backends +============================================================================= + +A pragmatic monkey-patching library that transparently redirects Python's standard +file operations to FSSpec backends, allowing existing code to work with any filesystem +without modification. +""" + +from .core import fsbridge_context, FSBridgeContext +from .mapping import FSSpecMapping, create_fs_mapping +from .utils import create_fsspec_fs, create_path_mapper_from_dict + +__version__ = "0.1.0" + +__all__ = [ + "fsbridge_context", + "FSBridgeContext", + "FSSpecMapping", + "create_fs_mapping", + "create_fsspec_fs", + "create_path_mapper_from_dict", +] diff --git a/src/fsbridge/atomic.py b/src/fsbridge/atomic.py new file mode 100644 index 0000000..d6c4013 --- /dev/null +++ b/src/fsbridge/atomic.py @@ -0,0 +1,141 @@ +""" +Atomic file operations support. + +This module provides classes to perform atomic write operations +with fsspec filesystems. +""" + +import os +import io + + +class AtomicFileWrapper: + """ + Wrapper for binary files that provides atomic write operations. + + This class buffers writes to a temporary file and only replaces the + target file when the operation is complete. + """ + + def __init__(self, fs, path, mode, suffix=".tmp", prefix=".", **kwargs): + self.fs = fs + self.path = path + self.mode = mode + self.suffix = suffix + self.prefix = prefix + self.kwargs = kwargs + + # Create temporary file path + dirname = os.path.dirname(self.path) + basename = os.path.basename(self.path) + self.temp_path = f"{dirname}/{self.prefix}{basename}{self.suffix}" + + # Open temporary file + self.file = fs.open(self.temp_path, mode, **kwargs) + self.closed = False + + def write(self, data): + """Write data to the temporary file.""" + return self.file.write(data) + + def read(self, size=-1): + """Read from the file.""" + return self.file.read(size) + + def close(self): + """Close the file and move it to the final destination.""" + if self.closed: + return + + self.file.close() + + # Only move the file if we were writing + if "w" in self.mode or "a" in self.mode or "+" in self.mode: + try: + # Remove destination if it exists + if self.fs.exists(self.path): + self.fs.rm(self.path) + # Move temp file to destination + self.fs.mv(self.temp_path, self.path) + except Exception as e: + # If moving fails, try to clean up the temp file + try: + self.fs.rm(self.temp_path) + except: + pass + raise e + + self.closed = True + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is not None: + # An exception occurred, discard the temp file + self.file.close() + try: + self.fs.rm(self.temp_path) + except: + pass + self.closed = True + else: + # No exception, close normally (which moves the file) + self.close() + + def __getattr__(self, name): + """Delegate all other attributes to the underlying file object.""" + return getattr(self.file, name) + + +class AtomicTextIOWrapper: + """ + Wrapper for text files that provides atomic write operations. + + This is a text-mode version of AtomicFileWrapper that properly + handles encoding and newlines. + """ + + def __init__( + self, + fs, + path, + mode, + suffix=".tmp", + prefix=".", + encoding=None, + errors=None, + newline=None, + **kwargs, + ): + # Create binary wrapper + self.binary_wrapper = AtomicFileWrapper( + fs, path, mode.replace("t", "") + "b", suffix, prefix, **kwargs + ) + + # Wrap with TextIOWrapper + self.wrapper = io.TextIOWrapper( + self.binary_wrapper, encoding=encoding, errors=errors, newline=newline + ) + + def write(self, data): + """Write data to the wrapped text file.""" + return self.wrapper.write(data) + + def read(self, size=-1): + """Read from the wrapped text file.""" + return self.wrapper.read(size) + + def close(self): + """Close the wrapped text file.""" + self.wrapper.close() + + def __enter__(self): + return self.wrapper + + def __exit__(self, exc_type, exc_val, exc_tb): + return self.wrapper.__exit__(exc_type, exc_val, exc_tb) + + def __getattr__(self, name): + """Delegate all other attributes to the underlying text file object.""" + return getattr(self.wrapper, name) diff --git a/src/fsbridge/core.py b/src/fsbridge/core.py new file mode 100644 index 0000000..7a80adb --- /dev/null +++ b/src/fsbridge/core.py @@ -0,0 +1,362 @@ +""" +Core functionality for fsbridge package. + +This module provides the context manager that handles patching +of Python's file-related operations. +""" + +import builtins +import os +import pathlib +import shutil +import functools +from contextlib import contextmanager + +from .mapping import FSSpecMapping, create_fs_mapping + + +class FSBridgeContext: + """ + Context manager for patching file system operations to redirect them to fsspec backends. + + This class temporarily replaces standard Python file operations with versions that + redirect to a specified fsspec filesystem when paths match certain criteria. + """ + + def __init__( + self, + fs_mapping, + patch_open=True, + patch_os=True, + patch_pathlib=True, + patch_shutil=True, + ): + """ + Initialize an FSBridge context. + + Parameters + ---------- + fs_mapping : FSSpecMapping + The mapping object that implements file operations + patch_open : bool, default True + Whether to patch the built-in open function + patch_os : bool, default True + Whether to patch os module functions + patch_pathlib : bool, default True + Whether to patch pathlib.Path class + patch_shutil : bool, default True + Whether to patch shutil module functions + """ + self.fs_mapping = fs_mapping + + # Configure which operations to patch + self.patch_open = patch_open + self.patch_os = patch_os + self.patch_pathlib = patch_pathlib + self.patch_shutil = patch_shutil + + # Keep track of original functions + self._originals = {} + + def __enter__(self): + """Enter the context manager, patching file operations.""" + self._patch_operations() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager, restoring original file operations.""" + self._restore_operations() + + def _patch_operations(self): + """Patch the file operations based on configuration.""" + if self.patch_open: + self._patch_open() + + if self.patch_os: + self._patch_os_functions() + + if self.patch_pathlib: + self._patch_pathlib() + + if self.patch_shutil: + self._patch_shutil() + + def _restore_operations(self): + """Restore all original functions.""" + for target, attr_name, original in self._originals.values(): + setattr(target, attr_name, original) + self._originals = {} + + def _patch_function(self, target, attr_name, replacement): + """ + Patch a function with its replacement. + + Parameters + ---------- + target : module or class + The object that contains the function to patch + attr_name : str + The name of the attribute to patch + replacement : callable + The replacement function + """ + original = getattr(target, attr_name) + self._originals[(id(target), attr_name)] = (target, attr_name, original) + setattr(target, attr_name, replacement) + + def _patch_open(self): + """Patch the built-in open function.""" + original_open = builtins.open + fs_mapping = self.fs_mapping + + @functools.wraps(original_open) + def patched_open(file, mode="r", *args, **kwargs): + # Check if we should redirect this open call + if isinstance(file, str): + should_patch, _ = fs_mapping.map_path(file, "open") + if should_patch: + return fs_mapping.open(file, mode, *args, **kwargs) + return original_open(file, mode, *args, **kwargs) + + self._patch_function(builtins, "open", patched_open) + + def _patch_os_functions(self): + """Patch relevant functions in the os module.""" + fs_mapping = self.fs_mapping + + # os.path functions + for func_name in [ + "exists", + "isdir", + "isfile", + "getsize", + "getmtime", + "getctime", + ]: + original_func = getattr(os.path, func_name) + + @functools.wraps(original_func) + def patched_func(path, *args, func_name=func_name, **kwargs): + should_patch, _ = fs_mapping.map_path(path, f"os.path.{func_name}") + if should_patch: + return getattr(fs_mapping, f"os_path_{func_name}")( + path, *args, **kwargs + ) + return original_func(path, *args, **kwargs) + + self._patch_function(os.path, func_name, patched_func) + + # os functions + for func_name in [ + "mkdir", + "makedirs", + "listdir", + "remove", + "unlink", + "rmdir", + "rename", + ]: + if hasattr(os, func_name): + original_func = getattr(os, func_name) + + if func_name == "rename": + # Special handling for rename which takes two paths + @functools.wraps(original_func) + def patched_rename(src, dst, *args, **kwargs): + should_patch_src, _ = fs_mapping.map_path(src, "os.rename.src") + should_patch_dst, _ = fs_mapping.map_path(dst, "os.rename.dst") + + if should_patch_src and should_patch_dst: + return fs_mapping.os_rename(src, dst, *args, **kwargs) + return original_func(src, dst, *args, **kwargs) + + self._patch_function(os, func_name, patched_rename) + else: + + @functools.wraps(original_func) + def patched_func(path, *args, func_name=func_name, **kwargs): + should_patch, _ = fs_mapping.map_path(path, f"os.{func_name}") + if should_patch: + return getattr(fs_mapping, f"os_{func_name}")( + path, *args, **kwargs + ) + return original_func(path, *args, **kwargs) + + self._patch_function(os, func_name, patched_func) + + def _patch_pathlib(self): + """Patch relevant methods in pathlib.Path.""" + fs_mapping = self.fs_mapping + + # Methods that take no arguments beyond self + for method_name in ["exists", "is_dir", "is_file", "stat", "mkdir"]: + original_method = getattr(pathlib.Path, method_name) + + @functools.wraps(original_method) + def patched_method(self_path, *args, method_name=method_name, **kwargs): + path_str = str(self_path) + should_patch, _ = fs_mapping.map_path( + path_str, f"pathlib.{method_name}" + ) + if should_patch: + return getattr(fs_mapping, f"pathlib_{method_name}")( + path_str, *args, **kwargs + ) + return original_method(self_path, *args, **kwargs) + + self._patch_function(pathlib.Path, method_name, patched_method) + + # Path.open method needs special handling + original_path_open = pathlib.Path.open + + @functools.wraps(original_path_open) + def patched_path_open(self_path, mode="r", *args, **kwargs): + path_str = str(self_path) + should_patch, _ = fs_mapping.map_path(path_str, "pathlib.open") + if should_patch: + return fs_mapping.open(path_str, mode, *args, **kwargs) + return original_path_open(self_path, mode, *args, **kwargs) + + self._patch_function(pathlib.Path, "open", patched_path_open) + + def _patch_shutil(self): + """Patch relevant functions in the shutil module.""" + fs_mapping = self.fs_mapping + + for func_name in ["copy", "copy2", "copyfile", "copytree", "move", "rmtree"]: + original_func = getattr(shutil, func_name) + + if func_name == "rmtree": + # rmtree takes only one path + @functools.wraps(original_func) + def patched_rmtree(path, *args, **kwargs): + should_patch, _ = fs_mapping.map_path(path, f"shutil.{func_name}") + if should_patch: + return fs_mapping.shutil_rmtree( + path, should_patch, False, *args, **kwargs + ) + return original_func(path, *args, **kwargs) + + self._patch_function(shutil, func_name, patched_rmtree) + else: + # These functions take two paths + @functools.wraps(original_func) + def patched_func(src, dst, *args, func_name=func_name, **kwargs): + should_patch_src, _ = fs_mapping.map_path( + src, f"shutil.{func_name}.src" + ) + should_patch_dst, _ = fs_mapping.map_path( + dst, f"shutil.{func_name}.dst" + ) + + if should_patch_src or should_patch_dst: + return getattr(fs_mapping, f"shutil_{func_name}")( + src, + dst, + should_patch_src, + should_patch_dst, + *args, + **kwargs, + ) + return original_func(src, dst, *args, **kwargs) + + self._patch_function(shutil, func_name, patched_func) + + +@contextmanager +def fsbridge_context( + fs=None, + path_mapping=None, + atomic_writes=True, + atomic_suffix=".tmp", + atomic_prefix=".", + patch_open=True, + patch_os=True, + patch_pathlib=True, + patch_shutil=True, + **fs_kwargs, +): + """ + Context manager that patches file operations to redirect them to fsspec. + + Parameters + ---------- + fs : fsspec.AbstractFileSystem, FSSpecMapping, or str, optional + The filesystem to use, a FSSpecMapping instance, or a string with the filesystem type. + If None, a local filesystem is created. + path_mapping : callable, dict, or None + Function that takes (path, operation) and returns (should_patch, mapped_path). + If a dict, maps local paths to remote paths. + If None, all operations are redirected. + Ignored if fs is an FSSpecMapping instance. + atomic_writes : bool, default True + Whether to perform atomic write operations using temporary files. + Ignored if fs is an FSSpecMapping instance. + atomic_suffix : str, default ".tmp" + Suffix to use for temporary files during atomic writes. + Ignored if fs is an FSSpecMapping instance. + atomic_prefix : str, default "." + Prefix to use for temporary files during atomic writes. + Ignored if fs is an FSSpecMapping instance. + patch_open : bool, default True + Whether to patch the built-in open function. + patch_os : bool, default True + Whether to patch os module functions. + patch_pathlib : bool, default True + Whether to patch pathlib.Path class. + patch_shutil : bool, default True + Whether to patch shutil module functions. + **fs_kwargs : dict + Additional keyword arguments to pass to the filesystem constructor if fs is a string. + Ignored if fs is an FSSpecMapping instance. + + Yields + ------ + FSBridgeContext + The context manager instance + + Examples + -------- + >>> # Map paths with a dictionary + >>> with fsbridge_context(path_mapping={'/outputs/': '/tmp/actual_outputs/'}): + ... with open('/outputs/results.csv', 'w') as f: + ... f.write('data,value\\n1,2\\n') + + >>> # Custom path mapper function + >>> def my_path_mapper(path, operation): + ... if path.endswith('.csv'): + ... return True, f"/special/csv/{os.path.basename(path)}" + ... return False, path + >>> + >>> with fsbridge_context(path_mapping=my_path_mapper): + ... with open('data.csv', 'w') as f: # This will be patched + ... f.write('data,value\\n1,2\\n') + """ + # Create fs_mapping if needed + if isinstance(fs, FSSpecMapping): + fs_mapping = fs + else: + fs_mapping = create_fs_mapping( + fs=fs, + path_mapping=path_mapping, + atomic_writes=atomic_writes, + atomic_suffix=atomic_suffix, + atomic_prefix=atomic_prefix, + **fs_kwargs, + ) + + # Create the context manager + context = FSBridgeContext( + fs_mapping, + patch_open=patch_open, + patch_os=patch_os, + patch_pathlib=patch_pathlib, + patch_shutil=patch_shutil, + ) + + # Enter the context + try: + context.__enter__() + yield context + finally: + context.__exit__(None, None, None) diff --git a/src/fsbridge/mapping.py b/src/fsbridge/mapping.py new file mode 100644 index 0000000..7336111 --- /dev/null +++ b/src/fsbridge/mapping.py @@ -0,0 +1,568 @@ +""" +Mapping between file operations and fsspec. + +This module provides the core mapping functionality that redirects standard +file operations to fsspec operations. +""" + +import os +import pathlib +from typing import Callable, Dict, Optional, Tuple, Union, Any + +from .atomic import AtomicFileWrapper, AtomicTextIOWrapper +from .utils import create_fsspec_fs, create_path_mapper_from_dict + + +class FSSpecMapping: + """ + Maps standard file operations to fsspec operations. + + This class provides the implementation of file operations that + are redirected to an fsspec filesystem. + """ + + def __init__( + self, + fs, + path_mapping=None, + atomic_writes=True, + atomic_suffix=".tmp", + atomic_prefix=".", + ): + """ + Initialize the mapping between file operations and fsspec. + + Parameters + ---------- + fs : fsspec.AbstractFileSystem + The filesystem to use for operations + path_mapping : callable, dict, or None + Function that takes (path, operation_name) and returns (bool, mapped_path) + Where the bool indicates if the operation should be patched and mapped_path + is the new path to use if patched. + If a dict, maps local paths to remote paths. + If None, all paths are used as-is. + atomic_writes : bool + Whether to use atomic write operations + atomic_suffix : str + Suffix for temporary files in atomic writes + atomic_prefix : str + Prefix for temporary files in atomic writes + """ + self.fs = fs + self.atomic_writes = atomic_writes + self.atomic_suffix = atomic_suffix + self.atomic_prefix = atomic_prefix + + # Set up path mapping function + if path_mapping is None: + # No path mapping - operations on all paths as-is + self._path_mapper = lambda path, operation: ( + (True, path) if path else (False, path) + ) + elif isinstance(path_mapping, dict): + # Dictionary mapping - use helper to create mapper + self._path_mapper = create_path_mapper_from_dict(path_mapping) + elif callable(path_mapping): + # Callable - use directly + self._path_mapper = path_mapping + else: + raise TypeError("path_mapping must be None, a dict, or a callable") + + # Extensions dictionary for custom implementations + self._extensions = {} + + def map_path(self, path, operation="default"): + """ + Check if a path should be patched and return the mapped path. + + Parameters + ---------- + path : str or pathlib.Path + The path being operated on + operation : str + The name of the operation being performed + + Returns + ------- + tuple + (should_patch, mapped_path) where: + - should_patch (bool): True if the operation should be patched + - mapped_path (str): The path to use if patched + """ + # Convert path to string if it's a Path object + if isinstance(path, pathlib.Path): + path = str(path) + + # Ensure path is a string + if not isinstance(path, str): + return False, path + + return self._path_mapper(path, operation) + + def register_extension(self, operation_name, function): + """ + Register a custom function to handle a specific operation. + + Parameters + ---------- + operation_name : str + The name of the operation to extend (e.g., 'open', 'os_mkdir') + function : callable + The function to call instead of the default implementation + + Returns + ------- + None + """ + self._extensions[operation_name] = function + + def __getattr__(self, name): + """ + Get an attribute, checking for extensions first. + + Parameters + ---------- + name : str + The name of the attribute to get + + Returns + ------- + The attribute value, or the extension function if registered + + Raises + ------ + AttributeError + If the attribute is not found + """ + # Check for extension + if name in self._extensions: + return self._extensions[name] + + # If not found, raise AttributeError + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" + ) + + def context( + self, patch_open=True, patch_os=True, patch_pathlib=True, patch_shutil=True + ): + """ + Create a context manager that patches file operations. + + Parameters + ---------- + patch_open : bool, default True + Whether to patch the built-in open function + patch_os : bool, default True + Whether to patch os module functions + patch_pathlib : bool, default True + Whether to patch pathlib.Path class + patch_shutil : bool, default True + Whether to patch shutil module functions + + Returns + ------- + FSBridgeContext + A context manager that patches file operations + """ + from .core import FSBridgeContext + + return FSBridgeContext( + self, + patch_open=patch_open, + patch_os=patch_os, + patch_pathlib=patch_pathlib, + patch_shutil=patch_shutil, + ) + + # File operations implementations + + def open(self, file, mode="r", *args, **kwargs): + """ + Open a file on the fsspec filesystem. + + Parameters + ---------- + file : str + The file path to open + mode : str + The mode to open the file with + *args, **kwargs + Additional arguments to pass to fs.open + + Returns + ------- + file-like object + A file-like object for the requested file + """ + should_patch, mapped_path = self.map_path(file, "open") + if not should_patch: + raise ValueError(f"Path {file} should not be patched") + + # Handle atomic writes if enabled + if self.atomic_writes and "w" in mode and "b" not in mode: + # For text mode with atomic writes, we need special handling + return AtomicTextIOWrapper( + self.fs, + mapped_path, + mode, + suffix=self.atomic_suffix, + prefix=self.atomic_prefix, + *args, + **kwargs, + ) + elif self.atomic_writes and "w" in mode: + # For binary mode with atomic writes + return AtomicFileWrapper( + self.fs, + mapped_path, + mode, + suffix=self.atomic_suffix, + prefix=self.atomic_prefix, + *args, + **kwargs, + ) + + # Normal open without atomic writes + return self.fs.open(mapped_path, mode, *args, **kwargs) + + # os.path mappings + def os_path_exists(self, path, *args, **kwargs): + """Check if a path exists on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.exists") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.exists(mapped_path) + + def os_path_isdir(self, path, *args, **kwargs): + """Check if a path is a directory on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.isdir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.isdir(mapped_path) + + def os_path_isfile(self, path, *args, **kwargs): + """Check if a path is a file on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.isfile") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.isfile(mapped_path) + + def os_path_getsize(self, path, *args, **kwargs): + """Get the size of a file on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.getsize") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.info(mapped_path)["size"] + + def os_path_getmtime(self, path, *args, **kwargs): + """Get the modification time of a file on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.getmtime") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + + # Some fsspec filesystems don't provide mtime, handle that case + info = self.fs.info(mapped_path) + return info.get("mtime", info.get("modified", 0)) + + def os_path_getctime(self, path, *args, **kwargs): + """Get the creation time of a file on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.path.getctime") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + + # Some fsspec filesystems don't provide ctime, handle that case + info = self.fs.info(mapped_path) + return info.get("ctime", info.get("created", 0)) + + # os function mappings + def os_mkdir(self, path, mode=0o777, *args, **kwargs): + """Create a directory on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.mkdir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + self.fs.mkdir(mapped_path) + + def os_makedirs(self, path, mode=0o777, exist_ok=False, *args, **kwargs): + """Create directories recursively on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.makedirs") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + self.fs.makedirs(mapped_path, exist_ok=exist_ok) + + def os_listdir(self, path, *args, **kwargs): + """List contents of a directory on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.listdir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + + files = self.fs.ls(mapped_path, detail=False) + + # Extract just the filenames (not full paths) + return [os.path.basename(f) for f in files] + + def os_remove(self, path, *args, **kwargs): + """Remove a file on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.remove") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + self.fs.rm(mapped_path) + + def os_unlink(self, path, *args, **kwargs): + """Remove a file on the fsspec filesystem (alias for remove).""" + should_patch, mapped_path = self.map_path(path, "os.unlink") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + self.fs.rm(mapped_path) + + def os_rmdir(self, path, *args, **kwargs): + """Remove an empty directory on the fsspec filesystem.""" + should_patch, mapped_path = self.map_path(path, "os.rmdir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + self.fs.rmdir(mapped_path) + + def os_rename(self, src, dst, *args, **kwargs): + """Rename a file or directory on the fsspec filesystem.""" + should_patch_src, mapped_src = self.map_path(src, "os.rename.src") + should_patch_dst, mapped_dst = self.map_path(dst, "os.rename.dst") + + if not should_patch_src or not should_patch_dst: + raise ValueError(f"Paths should both be patched: {src} -> {dst}") + + self.fs.mv(mapped_src, mapped_dst) + + # pathlib mappings + def pathlib_exists(self, path, *args, **kwargs): + """Check if a path exists (pathlib method).""" + should_patch, mapped_path = self.map_path(path, "pathlib.exists") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.exists(mapped_path) + + def pathlib_is_dir(self, path, *args, **kwargs): + """Check if a path is a directory (pathlib method).""" + should_patch, mapped_path = self.map_path(path, "pathlib.is_dir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.isdir(mapped_path) + + def pathlib_is_file(self, path, *args, **kwargs): + """Check if a path is a file (pathlib method).""" + should_patch, mapped_path = self.map_path(path, "pathlib.is_file") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + return self.fs.isfile(mapped_path) + + def pathlib_stat(self, path, *args, **kwargs): + """Get stat information (pathlib method).""" + should_patch, mapped_path = self.map_path(path, "pathlib.stat") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + + # Create a simple stat_result-like object with values from info + info = self.fs.info(mapped_path) + + class StatResult: + def __init__(self, info): + self.info = info + self.st_size = info.get("size", 0) + self.st_mtime = info.get("mtime", info.get("modified", 0)) + self.st_ctime = info.get("ctime", info.get("created", 0)) + self.st_mode = 0o777 if info.get("type") == "directory" else 0o666 + + return StatResult(info) + + def pathlib_mkdir( + self, path, mode=0o777, parents=False, exist_ok=False, *args, **kwargs + ): + """Create a directory (pathlib method).""" + should_patch, mapped_path = self.map_path(path, "pathlib.mkdir") + if not should_patch: + raise ValueError(f"Path {path} should not be patched") + + if parents: + return self.fs.makedirs(mapped_path, exist_ok=exist_ok) + else: + try: + return self.fs.mkdir(mapped_path) + except FileExistsError: + if exist_ok: + return + raise + + # shutil mappings + def shutil_copy(self, src, dst, src_patch, dst_patch, *args, **kwargs): + """Copy a file.""" + # Map paths if they should be patched + src_path = self.map_path(src, "shutil.copy.src")[1] if src_patch else src + dst_path = self.map_path(dst, "shutil.copy.dst")[1] if dst_patch else dst + + if src_patch and dst_patch: + # Both paths are on the fsspec filesystem + self.fs.copy(src_path, dst_path) + elif src_patch: + # Source is on fsspec, destination is on local filesystem + with self.fs.open(src_path, "rb") as fsrc: + with open(dst, "wb") as fdst: + while True: + chunk = fsrc.read(1024 * 1024) # 1MB chunks + if not chunk: + break + fdst.write(chunk) + elif dst_patch: + # Source is on local filesystem, destination is on fsspec + with open(src, "rb") as fsrc: + with self.fs.open(dst_path, "wb") as fdst: + while True: + chunk = fsrc.read(1024 * 1024) # 1MB chunks + if not chunk: + break + fdst.write(chunk) + + def shutil_copy2(self, src, dst, src_patch, dst_patch, *args, **kwargs): + """Copy a file with metadata.""" + # In fsspec, we may not be able to preserve all metadata, but we do our best + self.shutil_copy(src, dst, src_patch, dst_patch, *args, **kwargs) + + def shutil_copyfile(self, src, dst, src_patch, dst_patch, *args, **kwargs): + """Copy file contents.""" + # This is essentially the same as copy for us + self.shutil_copy(src, dst, src_patch, dst_patch, *args, **kwargs) + + def shutil_copytree(self, src, dst, src_patch, dst_patch, *args, **kwargs): + """Copy a directory tree.""" + symlinks = kwargs.get("symlinks", False) + ignore = kwargs.get("ignore", None) + dirs_exist_ok = kwargs.get("dirs_exist_ok", False) + + # Map paths if they should be patched + src_path = self.map_path(src, "shutil.copytree.src")[1] if src_patch else src + dst_path = self.map_path(dst, "shutil.copytree.dst")[1] if dst_patch else dst + + # Create the destination directory + if dst_patch: + self.fs.makedirs(dst_path, exist_ok=dirs_exist_ok) + else: + os.makedirs(dst, exist_ok=dirs_exist_ok) + + # Get the list of files and directories to copy + if src_patch: + entries = self.fs.find(src_path, detail=True) + files = [entry for entry in entries.values() if entry["type"] == "file"] + relative_files = [os.path.relpath(file["name"], src_path) for file in files] + else: + relative_files = [] + for dirpath, dirnames, filenames in os.walk(src): + for filename in filenames: + full_path = os.path.join(dirpath, filename) + relative_files.append(os.path.relpath(full_path, src)) + + # Copy each file + for relative_file in relative_files: + src_file = ( + os.path.join(src, relative_file) + if not src_patch + else os.path.join(src_path, relative_file) + ) + dst_file = ( + os.path.join(dst, relative_file) + if not dst_patch + else os.path.join(dst_path, relative_file) + ) + + # Create parent directories if needed + dst_dir = os.path.dirname(dst_file) + if dst_patch: + self.fs.makedirs(dst_dir, exist_ok=True) + else: + os.makedirs(dst_dir, exist_ok=True) + + # Copy the file + self.shutil_copy2(src_file, dst_file, src_patch, dst_patch) + + return dst + + def shutil_move(self, src, dst, src_patch, dst_patch, *args, **kwargs): + """Move a file or directory.""" + if src_patch and dst_patch: + # Both paths are on the fsspec filesystem + # Map paths if they should be patched + src_path = self.map_path(src, "shutil.move.src")[1] + dst_path = self.map_path(dst, "shutil.move.dst")[1] + self.fs.mv(src_path, dst_path) + else: + # Copy and then delete + self.shutil_copy2(src, dst, src_patch, dst_patch, *args, **kwargs) + if src_patch: + src_path = self.map_path(src, "shutil.move.src")[1] + self.fs.rm(src_path) + else: + if os.path.isdir(src): + import shutil + + shutil.rmtree(src) + else: + os.remove(src) + + def shutil_rmtree(self, path, src_patch, dst_patch, *args, **kwargs): + """Remove a directory tree.""" + # We only care about the first path for rmtree + if src_patch: + path_mapped = self.map_path(path, "shutil.rmtree")[1] + self.fs.rm(path_mapped, recursive=True) + else: + import shutil + + shutil.rmtree(path, *args, **kwargs) + + +def create_fs_mapping( + fs=None, + path_mapping=None, + atomic_writes=True, + atomic_suffix=".tmp", + atomic_prefix=".", + **fs_kwargs, +): + """ + Create a mapping between standard file operations and an fsspec filesystem. + + Parameters + ---------- + fs : fsspec.AbstractFileSystem or str, optional + The filesystem to use or a string representing the filesystem type. + If None, a local filesystem is created. + path_mapping : callable, dict, or None + Function that takes (path, operation) and returns (should_patch, mapped_path). + If a dict, maps local paths to remote paths. + If None, all operations are redirected. + atomic_writes : bool, default True + Whether to use atomic write operations + atomic_suffix : str, default ".tmp" + Suffix for temporary files in atomic writes + atomic_prefix : str, default "." + Prefix for temporary files in atomic writes + **fs_kwargs : dict + Additional keyword arguments to pass to the filesystem constructor if fs is a string + + Returns + ------- + FSSpecMapping + The mapping object + """ + # Create filesystem if needed + if fs is None: + fs = create_fsspec_fs("file") + elif isinstance(fs, str): + fs = create_fsspec_fs(fs, **fs_kwargs) + + # Create the mapping + return FSSpecMapping( + fs, + path_mapping=path_mapping, + atomic_writes=atomic_writes, + atomic_suffix=atomic_suffix, + atomic_prefix=atomic_prefix, + ) diff --git a/src/fsbridge/utils.py b/src/fsbridge/utils.py new file mode 100644 index 0000000..34ca257 --- /dev/null +++ b/src/fsbridge/utils.py @@ -0,0 +1,65 @@ +""" +Utility functions for fsbridge. + +This module provides helper functions for creating filesystems +and path mappers. +""" + +import os +from typing import Dict, Callable, Tuple + + +def create_fsspec_fs(fs_type="file", **fs_kwargs): + """ + Create an fsspec filesystem. + + Parameters + ---------- + fs_type : str, default "file" + The fsspec filesystem type to create + **fs_kwargs : dict + Additional keyword arguments to pass to the filesystem constructor + + Returns + ------- + fsspec.AbstractFileSystem + The created filesystem + """ + import fsspec + + return fsspec.filesystem(fs_type, **fs_kwargs) + + +def create_path_mapper_from_dict( + path_mapping: Dict[str, str], +) -> Callable[[str, str], Tuple[bool, str]]: + """ + Create a path mapper function from a dictionary. + + Parameters + ---------- + path_mapping : dict + Dictionary mapping local paths to remote paths + + Returns + ------- + callable + A function that takes (path, operation) and returns (should_patch, mapped_path) + """ + + def mapper(path, operation): + if not path: + return False, path + + for local_path, remote_path in path_mapping.items(): + if path.startswith(local_path): + # Replace local path prefix with remote path + rel_path = path[len(local_path) :].lstrip("/") + if remote_path: + mapped_path = f"{remote_path.rstrip('/')}/{rel_path}" + else: + mapped_path = rel_path + return True, mapped_path + return False, path + + return mapper diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..448b45e --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +""" +Tests for the fsbridge package. +""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..c535ede --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,58 @@ +""" +Pytest fixtures for fsbridge tests. +""" + +import os +import shutil +import tempfile +import pytest + +from fsbridge.mapping import create_fs_mapping +from fsbridge.utils import create_fsspec_fs + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for tests.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def temp_dir2(): + """Create a second temporary directory for tests.""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def fs(): + """Create a filesystem for tests.""" + return create_fsspec_fs("file") + + +@pytest.fixture +def path_mapper(temp_dir): + """Create a path mapper function for tests.""" + + def mapper(path, operation): + if path and path.startswith("/test/"): + mapped_path = os.path.join(temp_dir, path[6:]) + return True, mapped_path + return False, path + + return mapper + + +@pytest.fixture +def fs_mapping(fs, path_mapper): + """Create a mapping for tests.""" + return create_fs_mapping(fs=fs, path_mapping=path_mapper) + + +@pytest.fixture +def dict_mapping(temp_dir): + """Create a mapping with dictionary path mapping.""" + return create_fs_mapping(path_mapping={"/test/": temp_dir}) diff --git a/tests/test_atomic.py b/tests/test_atomic.py new file mode 100644 index 0000000..ffd97eb --- /dev/null +++ b/tests/test_atomic.py @@ -0,0 +1,62 @@ +""" +Tests for atomic file operations. +""" + +import os +import pytest + +from fsbridge.atomic import AtomicFileWrapper, AtomicTextIOWrapper + + +def test_atomic_binary(fs, temp_dir): + """Test atomic binary file operations.""" + test_path = os.path.join(temp_dir, "test.bin") + test_data = b"Hello, binary world!" + + # Write data + with AtomicFileWrapper(fs, test_path, "wb") as f: + f.write(test_data) + + # Check the file exists and has the right content + assert os.path.exists(test_path) + + with open(test_path, "rb") as f: + content = f.read() + + assert content == test_data + + +def test_atomic_text(fs, temp_dir): + """Test atomic text file operations.""" + test_path = os.path.join(temp_dir, "test.txt") + test_data = "Hello, text world!" + + # Write data + with AtomicTextIOWrapper(fs, test_path, "w") as f: + f.write(test_data) + + # Check the file exists and has the right content + assert os.path.exists(test_path) + + with open(test_path, "r") as f: + content = f.read() + + assert content == test_data + + +def test_atomic_exception(fs, temp_dir): + """Test atomic file operations with an exception.""" + test_path = os.path.join(temp_dir, "test_exception.txt") + + # Try to write data but raise an exception + with pytest.raises(RuntimeError): + with AtomicTextIOWrapper(fs, test_path, "w") as f: + f.write("This should not be written") + raise RuntimeError("Test exception") + + # Check the file does not exist + assert not os.path.exists(test_path) + + # Check that the temporary file was cleaned up + temp_files = [f for f in os.listdir(temp_dir) if f.startswith(".")] + assert len(temp_files) == 0 diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..1f88ed6 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,156 @@ +""" +Tests for the FSBridgeContext class. +""" + +import os +from pathlib import Path +import pytest + +from fsbridge.core import FSBridgeContext + + +def test_context_open(fs_mapping, temp_dir): + """Test the context manager with open.""" + test_file = "/test/test.txt" + test_content = "Hello, world!" + + with FSBridgeContext(fs_mapping): + # Write to the file + with open(test_file, "w") as f: + f.write(test_content) + + # Read from the file + with open(test_file, "r") as f: + content = f.read() + + assert content == test_content + + # Verify the file was created in the right place + real_path = os.path.join(temp_dir, "test.txt") + assert os.path.exists(real_path) + + with open(real_path, "r") as f: + content = f.read() + + assert content == test_content + + +def test_context_pathlib(fs_mapping, temp_dir): + """Test the context manager with pathlib.""" + test_dir = Path("/test/subdir") + test_file = test_dir / "test.txt" + test_content = "Hello, pathlib!" + + with FSBridgeContext(fs_mapping): + # Create directory + test_dir.mkdir(exist_ok=True) + + # Write to the file + test_file.write_text(test_content) + + # Read from the file + content = test_file.read_text() + + assert content == test_content + + # Verify the file was created in the right place + real_path = os.path.join(temp_dir, "subdir", "test.txt") + assert os.path.exists(real_path) + + with open(real_path, "r") as f: + content = f.read() + + assert content == test_content + + +def test_context_os(fs_mapping, temp_dir): + """Test the context manager with os.""" + test_dir = "/test/osdir" + test_file = os.path.join(test_dir, "test.txt") + test_content = "Hello, os!" + + with FSBridgeContext(fs_mapping): + # Create directory + os.makedirs(test_dir, exist_ok=True) + + # Write to the file + with open(test_file, "w") as f: + f.write(test_content) + + # List directory + files = os.listdir(test_dir) + assert "test.txt" in files + + # Check file exists + assert os.path.exists(test_file) + assert os.path.isfile(test_file) + + # Get file size + size = os.path.getsize(test_file) + assert size == len(test_content) + + # Verify the file was created in the right place + real_path = os.path.join(temp_dir, "osdir", "test.txt") + assert os.path.exists(real_path) + + +def test_context_shutil(fs_mapping, temp_dir): + """Test the context manager with shutil.""" + import shutil + + test_dir = "/test/shutil" + test_file = os.path.join(test_dir, "test.txt") + test_file2 = os.path.join(test_dir, "test2.txt") + test_content = "Hello, shutil!" + + with FSBridgeContext(fs_mapping): + # Create directory + os.makedirs(test_dir, exist_ok=True) + + # Write to the file + with open(test_file, "w") as f: + f.write(test_content) + + # Copy file + shutil.copy(test_file, test_file2) + + # Check file exists + assert os.path.exists(test_file2) + + # Read the copied file + with open(test_file2, "r") as f: + content = f.read() + + assert content == test_content + + # Verify the files were created in the right place + real_path = os.path.join(temp_dir, "shutil", "test.txt") + real_path2 = os.path.join(temp_dir, "shutil", "test2.txt") + assert os.path.exists(real_path) + assert os.path.exists(real_path2) + + +def test_selective_patching(fs_mapping, temp_dir): + """Test selective patching of functions.""" + test_file = "/test/selective.txt" + test_content = "Hello, selective!" + + # Test with only open patched + with FSBridgeContext( + fs_mapping, + patch_open=True, + patch_os=False, + patch_pathlib=False, + patch_shutil=False, + ): + # Write to the file (should be redirected) + with open(test_file, "w") as f: + f.write(test_content) + + # os.path.exists should not be redirected + # This will return False because we're checking the local filesystem + assert not os.path.exists(test_file) + + # Verify the file was created in the right place + real_path = os.path.join(temp_dir, "selective.txt") + assert os.path.exists(real_path) diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..088f586 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,109 @@ +""" +Integration tests for fsbridge. +""" + +import os +from pathlib import Path +import pytest + +from fsbridge import fsbridge_context + + +def test_basic_usage(temp_dir): + """Test basic usage with dictionary path mapping.""" + test_file = "/outputs/results.csv" + test_content = "data,value\n1,2\n" + + with fsbridge_context(path_mapping={"/outputs/": temp_dir}): + # Write to the file + with open(test_file, "w") as f: + f.write(test_content) + + # Create a directory + Path("/outputs/subdir").mkdir(exist_ok=True) + + # Copy a file + with open("/outputs/source.txt", "w") as f: + f.write("Source content") + + import shutil + + shutil.copy("/outputs/source.txt", "/outputs/destination.txt") + + # Verify the files were created in the right place + csv_path = os.path.join(temp_dir, "results.csv") + subdir_path = os.path.join(temp_dir, "subdir") + dest_path = os.path.join(temp_dir, "destination.txt") + + assert os.path.exists(csv_path) + assert os.path.isdir(subdir_path) + assert os.path.exists(dest_path) + + with open(csv_path, "r") as f: + content = f.read() + + assert content == test_content + + +def test_custom_mapper(temp_dir): + """Test using a custom path mapper function.""" + + # Custom path mapper that redirects only CSV files + def csv_mapper(path, operation): + if isinstance(path, str) and path.endswith(".csv"): + return True, os.path.join(temp_dir, os.path.basename(path)) + return False, path + + with fsbridge_context(path_mapping=csv_mapper): + # This will be redirected + with open("data.csv", "w") as f: + f.write("data,value\n1,2\n") + + # This will not be redirected + local_txt = os.path.join(temp_dir, "local.txt") + with open(local_txt, "w") as f: + f.write("Hello, world!\n") + + # Verify the CSV file was created in the tempdir + csv_path = os.path.join(temp_dir, "data.csv") + assert os.path.exists(csv_path) + + # Verify the text file was written directly + with open(local_txt, "r") as f: + content = f.read() + + assert content == "Hello, world!\n" + + +def test_multiple_filesystems(temp_dir, temp_dir2): + """Test using multiple target paths.""" + # Map multiple paths to different targets + path_mapping = { + "/data/": temp_dir, + "/logs/": temp_dir2, + } + + with fsbridge_context(path_mapping=path_mapping): + # Each path will be redirected to its corresponding target + with open("/data/file.csv", "w") as f: + f.write("data\n") + + with open("/logs/app.log", "w") as f: + f.write("log entry\n") + + # Verify the files were created in the right places + data_path = os.path.join(temp_dir, "file.csv") + log_path = os.path.join(temp_dir2, "app.log") + + assert os.path.exists(data_path) + assert os.path.exists(log_path) + + with open(data_path, "r") as f: + content = f.read() + + assert content == "data\n" + + with open(log_path, "r") as f: + content = f.read() + + assert content == "log entry\n" diff --git a/tests/test_mapping.py b/tests/test_mapping.py new file mode 100644 index 0000000..35441b2 --- /dev/null +++ b/tests/test_mapping.py @@ -0,0 +1,110 @@ +""" +Tests for the FSSpecMapping class. +""" + +import os +import pytest + +from fsbridge.mapping import FSSpecMapping, create_fs_mapping +from fsbridge.utils import create_path_mapper_from_dict + + +def test_map_path(fs_mapping, temp_dir): + """Test the map_path method.""" + # Test a path that should be mapped + should_patch, mapped_path = fs_mapping.map_path("/test/file.txt", "open") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "file.txt") + + # Test a path that should not be mapped + should_patch, mapped_path = fs_mapping.map_path("/other/file.txt", "open") + assert not should_patch + assert mapped_path == "/other/file.txt" + + +def test_register_extension(fs_mapping, temp_dir): + """Test registering an extension.""" + + # Define a custom implementation + def custom_open(self, file, mode="r", *args, **kwargs): + should_patch, mapped_path = self.map_path(file, "custom_open") + if not should_patch: + raise ValueError(f"Path {file} should not be patched") + return f"Custom open: {mapped_path} (mode: {mode})" + + # Register the extension + fs_mapping.register_extension("custom_open", custom_open) + + # Use the extension + result = fs_mapping.custom_open("/test/file.txt", "w") + assert result == f"Custom open: {os.path.join(temp_dir, 'file.txt')} (mode: w)" + + +def test_create_fs_mapping_with_dict(fs, temp_dir): + """Test creating a mapping with a dictionary.""" + # Create a mapping with a dictionary + mapping = create_fs_mapping(fs=fs, path_mapping={"/test/": temp_dir}) + + # Test mapping a path + should_patch, mapped_path = mapping.map_path("/test/file.txt", "open") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "file.txt") + + +def test_create_fs_mapping_with_callable(fs, path_mapper, temp_dir): + """Test creating a mapping with a callable.""" + # Create a mapping with a callable + mapping = create_fs_mapping(fs=fs, path_mapping=path_mapper) + + # Test mapping a path + should_patch, mapped_path = mapping.map_path("/test/file.txt", "open") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "file.txt") + + +def test_path_mapper_from_dict(temp_dir): + """Test creating a path mapper from a dictionary.""" + # Create a path mapper + mapper = create_path_mapper_from_dict({"/test/": temp_dir}) + + # Test mapping a path + should_patch, mapped_path = mapper("/test/file.txt", "open") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "file.txt") + + # Test mapping a path that doesn't match + should_patch, mapped_path = mapper("/other/file.txt", "open") + assert not should_patch + assert mapped_path == "/other/file.txt" + + +def test_operation_aware_mapping(fs, temp_dir): + """Test operation-aware mapping.""" + + # Create a path mapper that handles different operations + def operation_mapper(path, operation): + if not isinstance(path, str): + return False, path + + if path.startswith("/test/") and "write" in operation: + # Map write operations to 'writes' subdirectory + mapped_path = os.path.join(temp_dir, "writes", path[6:]) + return True, mapped_path + elif path.startswith("/test/") and "read" in operation: + # Map read operations to 'reads' subdirectory + mapped_path = os.path.join(temp_dir, "reads", path[6:]) + return True, mapped_path + + return False, path + + mapping = create_fs_mapping(fs=fs, path_mapping=operation_mapper) + + # Test write operation + should_patch, mapped_path = mapping.map_path("/test/file.txt", "write") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "writes", "file.txt") + + # Test read operation + should_patch, mapped_path = mapping.map_path("/test/file.txt", "read") + assert should_patch + assert mapped_path == os.path.join(temp_dir, "reads", "file.txt") diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..e7d0f1d --- /dev/null +++ b/uv.lock @@ -0,0 +1,22 @@ +version = 1 +revision = 2 +requires-python = ">=3.10" + +[[package]] +name = "fsbridge" +source = { editable = "." } +dependencies = [ + { name = "fsspec" }, +] + +[package.metadata] +requires-dist = [{ name = "fsspec", specifier = ">=2023.0.0" }] + +[[package]] +name = "fsspec" +version = "2025.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/77/deb99b97981e2e191913454da82d406702405178631c31cd623caebaf1b1/fsspec-2025.5.0.tar.gz", hash = "sha256:e4f4623bb6221f7407fd695cc535d1f857a077eb247580f4ada34f5dc25fd5c8", size = 300989, upload-time = "2025-05-20T15:46:22.484Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/a9/a7022f58e081149ec0184c31ea81dcee605e1d46380b48122e1ef94ac24e/fsspec-2025.5.0-py3-none-any.whl", hash = "sha256:0ca253eca6b5333d8a2b8bd98c7326fe821f1f0fdbd34e1b445bddde8e804c95", size = 196164, upload-time = "2025-05-20T15:46:20.89Z" }, +]