Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ccflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .context import *
from .enums import Enum
from .global_state import *
from .local_persistence import *
from .models import *
from .object_config import *
from .publisher import *
Expand Down
28 changes: 26 additions & 2 deletions ccflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from typing_extensions import Self

from .exttypes.pyobjectpath import PyObjectPath
from .local_persistence import register_ccflow_import_path, sync_to_module

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -156,6 +157,19 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta
- Registration by name, and coercion from string name to allow for object re-use from the configs
"""

@classmethod
def __pydantic_init_subclass__(cls, **kwargs):
super().__pydantic_init_subclass__(**kwargs)
# Register local-scope classes and __main__ classes so they're importable via PyObjectPath.
# - Local classes (<locals> in qualname) aren't importable via their qualname path
# - __main__ classes aren't importable cross-process (cloudpickle recreates them but
# doesn't add them to sys.modules["__main__"])
# Note: Cross-process unpickle sync (when __ccflow_import_path__ is already set) happens
# lazily via sync_to_module, since cloudpickle sets class attributes
# AFTER __pydantic_init_subclass__ runs.
if "<locals>" in cls.__qualname__ or cls.__module__ == "__main__":
register_ccflow_import_path(cls)

@computed_field(
alias="_target_",
repr=False,
Expand All @@ -165,8 +179,18 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta
)
@property
def type_(self) -> PyObjectPath:
"""The path to the object type"""
return PyObjectPath.validate(type(self))
"""The path to the object type.

For local classes (defined in functions), this returns the __ccflow_import_path__.
For cross-process unpickle scenarios, this also ensures the class is synced to
the ccflow.local_persistence module so the import path resolves correctly.
"""
cls = type(self)
# Handle cross-process unpickle: cloudpickle sets __ccflow_import_path__ but
# the class may not be on ccflow.local_persistence yet in this process
if "__ccflow_import_path__" in cls.__dict__:
sync_to_module(cls)
return PyObjectPath.validate(cls)

# We want to track under what names a model has been registered
_registrations: List[Tuple["ModelRegistry", str]] = PrivateAttr(default_factory=list)
Expand Down
127 changes: 93 additions & 34 deletions ccflow/exttypes/pyobjectpath.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,90 @@
"""This module contains extension types for pydantic."""

import importlib
from functools import cached_property, lru_cache
from types import FunctionType, MethodType, ModuleType
from typing import Any, Type, get_origin

from pydantic import ImportString, TypeAdapter
from pydantic import TypeAdapter
from pydantic_core import core_schema
from typing_extensions import Self

_import_string_adapter = TypeAdapter(ImportString)


@lru_cache(maxsize=None)
def import_string(input_string: str):
return _import_string_adapter.validate_python(input_string)
def import_string(dotted_path: str) -> Any:
"""Import an object from a dotted path string.

Handles nested class paths like 'module.OuterClass.InnerClass' by progressively
trying shorter module paths and using getattr for the remaining parts.

This is more flexible than pydantic's ImportString which can fail on nested classes.
"""
if not dotted_path:
raise ImportError("Empty path")

parts = dotted_path.split(".")

# Try progressively shorter module paths
# e.g., for 'a.b.C.D', try 'a.b.C.D', then 'a.b.C', then 'a.b', then 'a'
for i in range(len(parts), 0, -1):
module_path = ".".join(parts[:i])
try:
obj = importlib.import_module(module_path)
# Successfully imported module, now getattr for remaining parts
for attr_name in parts[i:]:
obj = getattr(obj, attr_name)
return obj
except ImportError:
continue
except AttributeError:
# Module imported but attribute not found - keep trying shorter paths
continue

raise ImportError(f"No module named '{dotted_path}'")


def _build_standard_import_path(obj: Any) -> str:
"""Build 'module.qualname' path from an object with __module__ and __qualname__."""
# Handle Python 2 -> 3 module name change for builtins
if obj.__module__ == "__builtin__":
module = "builtins"
else:
module = obj.__module__

qualname = obj.__qualname__
# Strip generic type parameters (e.g., "MyClass[int]" -> "MyClass")
# This happens with Generic types in pydantic. Type info is lost but
# at least the base class remains importable.
# TODO: Find a way of capturing the underlying type info
if "[" in qualname:
qualname = qualname.split("[", 1)[0]
return f"{module}.{qualname}" if module else qualname


class PyObjectPath(str):
"""Similar to pydantic's ImportString (formerly PyObject in v1), this class represents the path to the object as a string.
"""A string representing an importable Python object path (e.g., "module.ClassName").

Similar to pydantic's ImportString, but with consistent serialization behavior:
- ImportString deserializes to the actual object
- PyObjectPath deserializes back to the string path

In pydantic v1, PyObject could not be serialized to json, whereas in v2, ImportString can.
However, the round trip is not always consistent, i.e.
Example:
>>> ta = TypeAdapter(ImportString)
>>> ta.validate_json(ta.dump_json("math.pi"))
3.141592653589793
>>> ta = TypeAdapter(PyObjectPath)
>>> ta.validate_json(ta.dump_json("math.pi"))
'math.pi'

Other differences are that ImportString can contain other arbitrary python values, whereas PyObjectPath is always a string
PyObjectPath also only accepts importable objects, not arbitrary values:
>>> TypeAdapter(ImportString).validate_python(0)
0
>>> TypeAdapter(PyObjectPath).validate_python(0)
raises
"""

# TODO: It would be nice to make this also derive from Generic[T],
# where T could then by used for type checking in validate.
# where T could then be used for type checking in validate.
# However, this doesn't work: https://github.com/python/typing/issues/629

@cached_property
Expand All @@ -50,34 +98,43 @@ def __get_pydantic_core_schema__(cls, source_type, handler):

@classmethod
def _validate(cls, value: Any):
"""Convert value (string path or object) to PyObjectPath, verifying it's importable."""
if isinstance(value, str):
value = cls(value)
else: # Try to construct a string from the object that can then be used to import the object
path = cls(value)
else:
# Unwrap generic types (e.g., List[int] -> list)
origin = get_origin(value)
if origin:
value = origin
if hasattr(value, "__module__") and hasattr(value, "__qualname__"):
if value.__module__ == "__builtin__":
module = "builtins"
else:
module = value.__module__
qualname = value.__qualname__
if "[" in qualname:
# This happens with Generic types in pydantic. We strip out the info for now.
# TODO: Find a way of capturing the underlying type info
qualname = qualname.split("[", 1)[0]
if not module:
value = cls(qualname)
else:
value = cls(module + "." + qualname)
else:
raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}")
path = cls._path_from_object(value)

# Verify the path is actually importable
try:
value.object
path.object
except ImportError as e:
raise ValueError(f"ensure this value contains valid import path or importable object: {str(e)}")

return value
return path

@classmethod
def _path_from_object(cls, value: Any) -> "PyObjectPath":
"""Build import path from an object.

For classes with __ccflow_import_path__ set (local classes),
uses that path. Otherwise uses the standard module.qualname path.
"""
if isinstance(value, type):
# Use __ccflow_import_path__ if set (check __dict__ to avoid inheriting from parents).
# Note: accessing .__dict__ is safe here because value is a type (class object),
# and all class objects have __dict__. Only instances of __slots__ classes lack it.
if "__ccflow_import_path__" in value.__dict__:
return cls(value.__ccflow_import_path__)
return cls(_build_standard_import_path(value))

if hasattr(value, "__module__") and hasattr(value, "__qualname__"):
return cls(_build_standard_import_path(value))

raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}")

@classmethod
@lru_cache(maxsize=None)
Expand All @@ -86,10 +143,12 @@ def _validate_cached(cls, value: str) -> Self:

@classmethod
def validate(cls, value) -> Self:
"""Try to convert/validate an arbitrary value to a PyObjectPath."""
if isinstance(
value, (str, type, FunctionType, ModuleType, MethodType)
): # If the value is trivial, we cache it here to avoid the overhead of validation
"""Try to convert/validate an arbitrary value to a PyObjectPath.

Uses caching for common value types to improve performance.
"""
# Cache validation for common immutable types to avoid repeated work
if isinstance(value, (str, type, FunctionType, ModuleType, MethodType)):
return cls._validate_cached(value)
return _TYPE_ADAPTER.validate_python(value)

Expand Down
95 changes: 95 additions & 0 deletions ccflow/local_persistence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Register local-scope classes on a module so PyObjectPath can import them.

Classes defined in functions (with '<locals>' in __qualname__) aren't normally importable.
We give them a unique name and register them on this module (ccflow.local_persistence).
We keep __module__ and __qualname__ unchanged so cloudpickle can still serialize the
class definition.

This module provides:
- register_ccflow_import_path(cls): Register a local class with a unique import path
- sync_to_module(cls): Ensure a class with __ccflow_import_path__ is on the module
(used for cross-process unpickle scenarios)
- create_ccflow_model: Wrapper around pydantic.create_model that registers the created model
"""

import re
import sys
import uuid
from typing import Any, Type

__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "create_ccflow_model")

LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.local_persistence"


def _register_on_module(cls: Type[Any], module_name: str) -> None:
"""Register cls on the specified module with a unique name.

This sets __ccflow_import_path__ on the class without modifying __module__ or
__qualname__, preserving cloudpickle's ability to serialize the class definition.

Args:
cls: The class to register.
module_name: The fully-qualified module name to register on (must be in sys.modules).
"""
# Sanitize the class name to be a valid Python identifier
name = re.sub(r"[^0-9A-Za-z_]", "_", cls.__name__ or "Model").strip("_") or "Model"
if name[0].isdigit():
name = f"_{name}"
unique = f"_Local_{name}_{uuid.uuid4().hex[:12]}"

setattr(sys.modules[module_name], unique, cls)
cls.__ccflow_import_path__ = f"{module_name}.{unique}"


def register_ccflow_import_path(cls: Type[Any]) -> None:
"""Give cls a unique name and register it on ccflow.local_persistence.

This sets __ccflow_import_path__ on the class without modifying __module__ or
__qualname__, preserving cloudpickle's ability to serialize the class definition.
"""
_register_on_module(cls, LOCAL_ARTIFACTS_MODULE_NAME)


def sync_to_module(cls: Type[Any]) -> None:
"""Ensure cls is registered on the artifacts module in this process.

This handles cross-process unpickle scenarios where cloudpickle recreates the class
with __ccflow_import_path__ already set (from the original process), but the class
isn't yet registered on ccflow.local_persistence in the new process.
"""
path = getattr(cls, "__ccflow_import_path__", "")
if path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."):
name = path.rsplit(".", 1)[-1]
base = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME]
if getattr(base, name, None) is not cls:
setattr(base, name, cls)


def create_ccflow_model(__model_name: str, *, __base__: Any = None, **field_definitions: Any) -> Type[Any]:
"""Create a dynamic ccflow model and register it for PyObjectPath serialization.

Wraps pydantic's create_model and registers the model so it can be serialized
via PyObjectPath, including across processes (e.g., with Ray).

Example:
>>> from ccflow import ContextBase, create_ccflow_model
>>> MyContext = create_ccflow_model(
... "MyContext",
... __base__=ContextBase,
... name=(str, ...),
... value=(int, 0),
... )
>>> ctx = MyContext(name="test", value=42)
"""
from pydantic import create_model as pydantic_create_model

model = pydantic_create_model(__model_name, __base__=__base__, **field_definitions)

# Register if it's a ccflow BaseModel subclass
from ccflow.base import BaseModel

if isinstance(model, type) and issubclass(model, BaseModel):
register_ccflow_import_path(model)

return model
9 changes: 8 additions & 1 deletion ccflow/tests/evaluators/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
import pandas as pd
import pyarrow as pa

from ccflow import DateContext, DateRangeContext, Evaluator, FlowOptionsOverride, ModelEvaluationContext, NullContext
from ccflow import (
DateContext,
DateRangeContext,
Evaluator,
FlowOptionsOverride,
ModelEvaluationContext,
NullContext,
)
from ccflow.evaluators import (
FallbackEvaluator,
GraphEvaluator,
Expand Down
17 changes: 17 additions & 0 deletions ccflow/tests/exttypes/test_pyobjectpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,20 @@ def test_pickle(self):
self.assertIsNotNone(p.object)
self.assertEqual(p, pickle.loads(pickle.dumps(p)))
self.assertEqual(p.object, pickle.loads(pickle.dumps(p.object)))

def test_builtin_module_alias(self):
"""Test that objects with __module__ == '__builtin__' are handled correctly.

In Python 2, built-in types had __module__ == '__builtin__', but in Python 3
it's 'builtins'. Some C extensions or pickled objects may still report the
old module name.
"""

# Create a mock object that reports __builtin__ as its module
class MockBuiltinObject:
__module__ = "__builtin__"
__qualname__ = "int"

p = PyObjectPath.validate(MockBuiltinObject)
self.assertEqual(p, "builtins.int")
self.assertEqual(p.object, int)
Loading