From 58b4650c901fb6a910e80689e1133d8efac2a183 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 19 Dec 2025 19:42:50 -0500 Subject: [PATCH 01/15] Register locally defined CallableModels and Contexts in a module to work with PyObjectPath Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 12 ++ ccflow/callable.py | 2 + ccflow/local_persistence.py | 72 +++++++++ ccflow/tests/evaluators/test_common.py | 50 ++++++- ccflow/tests/local_helpers.py | 46 ++++++ ccflow/tests/test_base.py | 70 ++++++++- ccflow/tests/test_callable.py | 193 ++++++++++++++++++++++++- ccflow/tests/test_local_persistence.py | 81 +++++++++++ docs/wiki/Key-Features.md | 3 + 9 files changed, 524 insertions(+), 5 deletions(-) create mode 100644 ccflow/local_persistence.py create mode 100644 ccflow/tests/local_helpers.py create mode 100644 ccflow/tests/test_local_persistence.py diff --git a/ccflow/base.py b/ccflow/base.py index 18e2eac..adfec2e 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -30,6 +30,7 @@ from typing_extensions import Self from .exttypes.pyobjectpath import PyObjectPath +from .local_persistence import register_local_subclass log = logging.getLogger(__name__) @@ -156,6 +157,15 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta - Registration by name, and coercion from string name to allow for object re-use from the configs """ + __ccflow_local_registration_kind__: ClassVar[str] = "model" + + @classmethod + def __pydantic_init_subclass__(cls, **kwargs): + # __pydantic_init_subclass__ is the supported hook point once Pydantic finishes wiring the subclass. + super().__pydantic_init_subclass__(**kwargs) + kind = getattr(cls, "__ccflow_local_registration_kind__", "model") + register_local_subclass(cls, kind=kind) + @computed_field( alias="_target_", repr=False, @@ -820,6 +830,8 @@ class ContextBase(ResultBase): that is an input into another CallableModel. """ + __ccflow_local_registration_kind__: ClassVar[str] = "context" + model_config = ConfigDict( frozen=True, arbitrary_types_allowed=False, diff --git a/ccflow/callable.py b/ccflow/callable.py index b09eaea..595079d 100644 --- a/ccflow/callable.py +++ b/ccflow/callable.py @@ -74,6 +74,8 @@ class _CallableModel(BaseModel, abc.ABC): The purpose of this class is to provide type definitions of context_type and return_type. """ + __ccflow_local_registration_kind__: ClassVar[str] = "callable_model" + model_config = ConfigDict( ignored_types=(property,), ) diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py new file mode 100644 index 0000000..7331a9b --- /dev/null +++ b/ccflow/local_persistence.py @@ -0,0 +1,72 @@ +"""Helpers for persisting BaseModel-derived classes defined inside local scopes.""" + +from __future__ import annotations + +import re +import sys +from collections import defaultdict +from itertools import count +from types import ModuleType +from typing import Any, DefaultDict, Type + +__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "register_local_subclass") + + +LOCAL_ARTIFACTS_MODULE_NAME = "ccflow._local_artifacts" +_LOCAL_MODULE_DOC = "Auto-generated BaseModel subclasses created outside importable modules." + +_SANITIZE_PATTERN = re.compile(r"[^0-9A-Za-z_]") +_LOCAL_KIND_COUNTERS: DefaultDict[str, count] = defaultdict(lambda: count()) + + +def _ensure_module(name: str, doc: str) -> ModuleType: + """Ensure the dynamic module exists so import paths remain stable.""" + module = sys.modules.get(name) + if module is None: + module = ModuleType(name, doc) + sys.modules[name] = module + parent_name, _, attr = name.rpartition(".") + if parent_name: + parent_module = sys.modules.get(parent_name) + if parent_module and not hasattr(parent_module, attr): + setattr(parent_module, attr, module) + return module + + +_LOCAL_ARTIFACTS_MODULE = _ensure_module(LOCAL_ARTIFACTS_MODULE_NAME, _LOCAL_MODULE_DOC) + + +def _needs_registration(cls: Type[Any]) -> bool: + module = getattr(cls, "__module__", "") + qualname = getattr(cls, "__qualname__", "") + return "" in qualname or module == "__main__" + + +def _sanitize_identifier(value: str, fallback: str) -> str: + sanitized = _SANITIZE_PATTERN.sub("_", value or "") + sanitized = sanitized.strip("_") or fallback + if sanitized[0].isdigit(): + sanitized = f"_{sanitized}" + return sanitized + + +def _build_unique_name(*, kind_slug: str, name_hint: str) -> str: + sanitized_hint = _sanitize_identifier(name_hint, "BaseModel") + counter = _LOCAL_KIND_COUNTERS[kind_slug] + return f"{kind_slug}__{sanitized_hint}__{next(counter)}" + + +def register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: + """Register BaseModel subclasses created in local scopes.""" + if getattr(cls, "__module__", "").startswith(LOCAL_ARTIFACTS_MODULE_NAME): + return + if not _needs_registration(cls): + return + + name_hint = f"{getattr(cls, '__module__', '')}.{getattr(cls, '__qualname__', '')}" + kind_slug = _sanitize_identifier(kind, "model").lower() + unique_name = _build_unique_name(kind_slug=kind_slug, name_hint=name_hint) + setattr(_LOCAL_ARTIFACTS_MODULE, unique_name, cls) + cls.__module__ = _LOCAL_ARTIFACTS_MODULE.__name__ + cls.__qualname__ = unique_name + setattr(cls, "__ccflow_dynamic_origin__", name_hint) diff --git a/ccflow/tests/evaluators/test_common.py b/ccflow/tests/evaluators/test_common.py index 7b5abf0..4a4b503 100644 --- a/ccflow/tests/evaluators/test_common.py +++ b/ccflow/tests/evaluators/test_common.py @@ -1,11 +1,24 @@ import logging from datetime import date +from typing import ClassVar from unittest import TestCase import pandas as pd import pyarrow as pa -from ccflow import DateContext, DateRangeContext, Evaluator, FlowOptionsOverride, ModelEvaluationContext, NullContext +from ccflow import ( + CallableModel, + ContextBase, + DateContext, + DateRangeContext, + Evaluator, + Flow, + FlowOptions, + FlowOptionsOverride, + GenericResult, + ModelEvaluationContext, + NullContext, +) from ccflow.evaluators import ( FallbackEvaluator, GraphEvaluator, @@ -16,6 +29,7 @@ combine_evaluators, get_dependency_graph, ) +from ccflow.tests.local_helpers import build_nested_graph_chain from .util import CircularModel, MyDateCallable, MyDateRangeCallable, MyRaisingCallable, NodeModel, ResultModel @@ -473,3 +487,37 @@ def test_graph_evaluator_circular(self): evaluator = GraphEvaluator() with FlowOptionsOverride(options={"evaluator": evaluator}): self.assertRaises(Exception, root, context) + + def test_graph_evaluator_with_local_models_and_cache(self): + ParentCls, ChildCls = build_nested_graph_chain() + ChildCls.call_count = 0 + model = ParentCls(child=ChildCls()) + evaluator = MultiEvaluator(evaluators=[GraphEvaluator(), MemoryCacheEvaluator()]) + with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator, cacheable=True)): + first = model(NullContext()) + second = model(NullContext()) + self.assertEqual(first.value, second.value) + self.assertEqual(ChildCls.call_count, 1) + + +class TestMemoryCacheEvaluatorLocal(TestCase): + def test_memory_cache_handles_local_context_and_callable(self): + class LocalContext(ContextBase): + value: int + + class LocalModel(CallableModel): + call_count: ClassVar[int] = 0 + + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + type(self).call_count += 1 + return GenericResult(value=context.value * 2) + + evaluator = MemoryCacheEvaluator() + LocalModel.call_count = 0 + model = LocalModel() + with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator, cacheable=True)): + result1 = model(LocalContext(value=5)) + result2 = model(LocalContext(value=5)) + self.assertEqual(result1.value, result2.value) + self.assertEqual(LocalModel.call_count, 1) diff --git a/ccflow/tests/local_helpers.py b/ccflow/tests/local_helpers.py new file mode 100644 index 0000000..5f616cf --- /dev/null +++ b/ccflow/tests/local_helpers.py @@ -0,0 +1,46 @@ +"""Shared helpers for constructing local-scope contexts/models in tests.""" + +from typing import ClassVar, Tuple, Type + +from ccflow import CallableModel, ContextBase, Flow, GenericResult, GraphDepList, NullContext + + +def build_local_callable(name: str = "LocalCallable") -> Type[CallableModel]: + class _LocalCallable(CallableModel): + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + return GenericResult(value="local") + + _LocalCallable.__name__ = name + return _LocalCallable + + +def build_local_context(name: str = "LocalContext") -> Type[ContextBase]: + class _LocalContext(ContextBase): + value: int + + _LocalContext.__name__ = name + return _LocalContext + + +def build_nested_graph_chain() -> Tuple[Type[CallableModel], Type[CallableModel]]: + class LocalLeaf(CallableModel): + call_count: ClassVar[int] = 0 + + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + type(self).call_count += 1 + return GenericResult(value="leaf") + + class LocalParent(CallableModel): + child: LocalLeaf + + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + return self.child(context) + + @Flow.deps + def __deps__(self, context: NullContext) -> GraphDepList: + return [(self.child, [context])] + + return LocalParent, LocalLeaf diff --git a/ccflow/tests/test_base.py b/ccflow/tests/test_base.py index 3c6fc02..d6d9413 100644 --- a/ccflow/tests/test_base.py +++ b/ccflow/tests/test_base.py @@ -1,9 +1,10 @@ from typing import Any, Dict, List -from unittest import TestCase +from unittest import TestCase, mock -from pydantic import ConfigDict, ValidationError +from pydantic import BaseModel as PydanticBaseModel, ConfigDict, ValidationError -from ccflow import BaseModel, PyObjectPath +from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext, PyObjectPath +from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME class ModelA(BaseModel): @@ -106,6 +107,20 @@ def test_type_after_assignment(self): self.assertIsInstance(m.type_, PyObjectPath) self.assertEqual(m.type_, path) + def test_pyobjectpath_requires_ccflow_local_registration(self): + class PlainLocalModel(PydanticBaseModel): + value: int + + with self.assertRaises(ValueError): + PyObjectPath.validate(PlainLocalModel) + + class LocalCcflowModel(BaseModel): + value: int + + path = PyObjectPath.validate(LocalCcflowModel) + self.assertEqual(path.object, LocalCcflowModel) + self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + def test_validate(self): self.assertEqual(ModelA.model_validate({"x": "foo"}), ModelA(x="foo")) type_ = "ccflow.tests.test_base.ModelA" @@ -157,3 +172,52 @@ def test_widget(self): {"expanded": True, "root": "bar"}, ), ) + + +class TestLocalRegistrationKind(TestCase): + def test_base_model_defaults_to_model_kind(self): + with mock.patch("ccflow.base.register_local_subclass") as register: + + class LocalModel(BaseModel): + value: int + + register.assert_called_once() + args, kwargs = register.call_args + self.assertIs(args[0], LocalModel) + self.assertEqual(kwargs["kind"], "model") + + def test_context_defaults_to_context_kind(self): + with mock.patch("ccflow.base.register_local_subclass") as register: + + class LocalContext(ContextBase): + value: int + + register.assert_called_once() + args, kwargs = register.call_args + self.assertIs(args[0], LocalContext) + self.assertEqual(kwargs["kind"], "context") + + def test_callable_defaults_to_callable_kind(self): + with mock.patch("ccflow.base.register_local_subclass") as register: + + class LocalCallable(CallableModel): + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + return GenericResult(value="ok") + + register.assert_called_once() + args, kwargs = register.call_args + self.assertIs(args[0], LocalCallable) + self.assertEqual(kwargs["kind"], "callable_model") + + def test_explicit_override_respected(self): + with mock.patch("ccflow.base.register_local_subclass") as register: + + class CustomKind(BaseModel): + __ccflow_local_registration_kind__ = "custom" + value: int + + register.assert_called_once() + args, kwargs = register.call_args + self.assertIs(args[0], CustomKind) + self.assertEqual(kwargs["kind"], "custom") diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 0ba24ea..5b0f0a9 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -1,5 +1,6 @@ +import sys from pickle import dumps as pdumps, loads as ploads -from typing import Generic, List, Optional, Tuple, Type, TypeVar, Union +from typing import ClassVar, Generic, List, Optional, Tuple, Type, TypeVar, Union from unittest import TestCase import ray @@ -21,6 +22,39 @@ ResultType, WrapperModel, ) +from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME +from ccflow.tests.local_helpers import build_local_callable, build_local_context + + +def _find_registered_name(module, cls): + for name, value in vars(module).items(): + if value is cls: + return name + raise AssertionError(f"{cls} not found in {module.__name__}") + + +def _build_main_module_callable(): + namespace = { + "__name__": "__main__", + "ClassVar": ClassVar, + "CallableModel": CallableModel, + "Flow": Flow, + "GenericResult": GenericResult, + "NullContext": NullContext, + } + exec( + """ +class MainModuleCallable(CallableModel): + call_count: ClassVar[int] = 0 + + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + type(self).call_count += 1 + return GenericResult(value=\"main\") +""", + namespace, + ) + return namespace["MainModuleCallable"] class MyContext(ContextBase): @@ -493,6 +527,163 @@ def test_union_return(self): self.assertEqual(result.a, 1) +class TestCallableModelRegistration(TestCase): + def test_module_level_class_retains_module(self): + self.assertEqual(MyCallable.__module__, __name__) + dynamic_module = sys.modules.get(LOCAL_ARTIFACTS_MODULE_NAME) + if dynamic_module: + self.assertFalse(any(value is MyCallable for value in vars(dynamic_module).values())) + + def test_module_level_context_retains_module(self): + self.assertEqual(MyContext.__module__, __name__) + dynamic_module = sys.modules.get(LOCAL_ARTIFACTS_MODULE_NAME) + if dynamic_module: + self.assertFalse(any(value is MyContext for value in vars(dynamic_module).values())) + + def test_local_class_moves_under_dynamic_namespace(self): + LocalCallable = build_local_callable() + module_name = LocalCallable.__module__ + self.assertEqual(module_name, LOCAL_ARTIFACTS_MODULE_NAME) + dynamic_module = sys.modules[module_name] + self.assertIs(getattr(dynamic_module, LocalCallable.__qualname__), LocalCallable) + self.assertIn("", getattr(LocalCallable, "__ccflow_dynamic_origin__")) + result = LocalCallable()(NullContext()) + self.assertEqual(result.value, "local") + + def test_multiple_local_definitions_have_unique_identifiers(self): + first = build_local_callable() + second = build_local_callable() + self.assertNotEqual(first.__qualname__, second.__qualname__) + dynamic_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + self.assertIs(getattr(dynamic_module, first.__qualname__), first) + self.assertIs(getattr(dynamic_module, second.__qualname__), second) + + def test_context_and_callable_same_name_do_not_collide(self): + def build_conflicting(): + class LocalThing(ContextBase): + value: int + + context_cls = LocalThing + + class LocalThing(CallableModel): + @Flow.call + def __call__(self, context: context_cls) -> GenericResult: + return GenericResult(value=context.value) + + callable_cls = LocalThing + return context_cls, callable_cls + + LocalContext, LocalCallable = build_conflicting() + locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + ctx_attr = _find_registered_name(locals_module, LocalContext) + model_attr = _find_registered_name(locals_module, LocalCallable) + self.assertTrue(ctx_attr.startswith("context__")) + self.assertTrue(model_attr.startswith("callable_model__")) + ctx_hint = ctx_attr.partition("__")[2].rsplit("__", 1)[0] + model_hint = model_attr.partition("__")[2].rsplit("__", 1)[0] + self.assertEqual(ctx_hint, model_hint) + self.assertEqual(getattr(locals_module, ctx_attr), LocalContext) + self.assertEqual(getattr(locals_module, model_attr), LocalCallable) + self.assertNotEqual(ctx_attr, model_attr, "Kind-prefixed names keep contexts and callables distinct.") + + def test_local_callable_type_path_roundtrip(self): + LocalCallable = build_local_callable() + instance = LocalCallable() + path = instance.type_ + self.assertEqual(path.object, LocalCallable) + self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + + def test_local_context_type_path_roundtrip(self): + LocalContext = build_local_context() + ctx = LocalContext(value=10) + path = ctx.type_ + self.assertEqual(path.object, LocalContext) + self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + + def test_exec_defined_main_module_class_registered(self): + MainCallable = _build_main_module_callable() + self.assertEqual(MainCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertTrue(getattr(MainCallable, "__ccflow_dynamic_origin__").startswith("__main__.")) + model = MainCallable() + MainCallable.call_count = 0 + result = model(NullContext()) + self.assertEqual(result.value, "main") + self.assertEqual(MainCallable.call_count, 1) + + def test_local_context_and_model_serialization_roundtrip(self): + class LocalContext(ContextBase): + value: int + + class LocalModel(CallableModel): + factor: int = 2 + + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + return GenericResult(value=context.value * self.factor) + + instance = LocalModel(factor=5) + context = LocalContext(value=7) + serialized_model = instance.model_dump(mode="python") + restored_model = LocalModel.model_validate(serialized_model) + self.assertEqual(restored_model, instance) + serialized_context = context.model_dump(mode="python") + restored_context = LocalContext.model_validate(serialized_context) + self.assertEqual(restored_context, context) + + def test_multiple_nested_levels_unique_paths(self): + created = [] + + def layer(depth: int): + class LocalContext(ContextBase): + value: int + + class LocalModel(CallableModel): + multiplier: int = depth + 1 + call_count: ClassVar[int] = 0 + + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + type(self).call_count += 1 + return GenericResult(value=context.value * self.multiplier) + + created.append((depth, LocalContext, LocalModel)) + + if depth < 2: + + def inner(): + layer(depth + 1) + + inner() + + def sibling_group(): + class LocalContext(ContextBase): + value: int + + class LocalModel(CallableModel): + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + return GenericResult(value=context.value) + + created.append(("sibling", LocalContext, LocalModel)) + + layer(0) + sibling_group() + sibling_group() + + locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + + context_names = {ctx.__qualname__ for _, ctx, _ in created} + model_names = {model.__qualname__ for _, _, model in created} + self.assertEqual(len(context_names), len(created)) + self.assertEqual(len(model_names), len(created)) + + for _, ctx_cls, model_cls in created: + self.assertIs(getattr(locals_module, ctx_cls.__qualname__), ctx_cls) + self.assertIs(getattr(locals_module, model_cls.__qualname__), model_cls) + self.assertIn("", getattr(ctx_cls, "__ccflow_dynamic_origin__")) + self.assertIn("", getattr(model_cls, "__ccflow_dynamic_origin__")) + + class TestWrapperModel(TestCase): def test_wrapper(self): md = MetaData(name="foo", description="My Foo") diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py new file mode 100644 index 0000000..dccd2ef --- /dev/null +++ b/ccflow/tests/test_local_persistence.py @@ -0,0 +1,81 @@ +from collections import defaultdict +from itertools import count +from unittest import TestCase, mock + +import ccflow.local_persistence as local_persistence +from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext + + +class ModuleLevelModel(BaseModel): + value: int + + +class ModuleLevelContext(ContextBase): + value: int + + +class ModuleLevelCallable(CallableModel): + @Flow.call + def __call__(self, context: NullContext) -> GenericResult: + return GenericResult(value="ok") + + +class TestNeedsRegistration(TestCase): + def test_module_level_ccflow_classes_do_not_need_registration(self): + for cls in (ModuleLevelModel, ModuleLevelContext, ModuleLevelCallable): + with self.subTest(cls=cls): + self.assertFalse(local_persistence._needs_registration(cls)) + + def test_local_scope_class_needs_registration(self): + def build_class(): + class LocalClass: + pass + + return LocalClass + + LocalClass = build_class() + self.assertTrue(local_persistence._needs_registration(LocalClass)) + + def test_main_module_class_needs_registration(self): + cls = type("MainModuleClass", (), {}) + cls.__module__ = "__main__" + cls.__qualname__ = "MainModuleClass" + self.assertTrue(local_persistence._needs_registration(cls)) + + def test_module_level_non_ccflow_class_does_not_need_registration(self): + cls = type("ExternalClass", (), {}) + cls.__module__ = "ccflow.tests.test_local_persistence" + cls.__qualname__ = "ExternalClass" + self.assertFalse(local_persistence._needs_registration(cls)) + + +class TestBuildUniqueName(TestCase): + def test_build_unique_name_sanitizes_hint_and_increments_counter(self): + with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): + name = local_persistence._build_unique_name( + kind_slug="callable_model", + name_hint="module.path:MyCallable", + ) + self.assertTrue(name.startswith("callable_model__module_path_MyCallable_One_")) + self.assertTrue(name.endswith("__0")) + + second = local_persistence._build_unique_name( + kind_slug="callable_model", + name_hint="module.path:MyCallable", + ) + self.assertTrue(second.endswith("__1")) + + def test_counters_are_namespaced_by_kind(self): + with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): + first_context = local_persistence._build_unique_name(kind_slug="context", name_hint="Context") + first_callable = local_persistence._build_unique_name(kind_slug="callable_model", name_hint="Callable") + second_context = local_persistence._build_unique_name(kind_slug="context", name_hint="Other") + + self.assertTrue(first_context.endswith("__0")) + self.assertTrue(first_callable.endswith("__0")) + self.assertTrue(second_context.endswith("__1")) + + def test_empty_hint_uses_fallback(self): + with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): + name = local_persistence._build_unique_name(kind_slug="model", name_hint="") + self.assertEqual(name, "model__BaseModel__0") diff --git a/docs/wiki/Key-Features.md b/docs/wiki/Key-Features.md index 616e3d8..1f5bd18 100644 --- a/docs/wiki/Key-Features.md +++ b/docs/wiki/Key-Features.md @@ -22,6 +22,9 @@ The naming was inspired by the open source library [Pydantic](https://docs.pydan `CallableModel`'s are called with a context (something that derives from `ContextBase`) and returns a result (something that derives from `ResultBase`). As an example, you may have a `SQLReader` callable model that when called with a `DateRangeContext` returns a `ArrowResult` (wrapper around a Arrow table) with data in the date range defined by the context by querying some SQL database. +> [!NOTE] +> `CallableModel`, `ContextBase`, and other `ccflow.BaseModel` subclasses declared inside local scopes (functions, tests, notebooks, REPLs, etc.) are automatically persisted under `ccflow._local_artifacts`. Each stored class is prefixed with its kind (for example, `callable_model__...` versus `context__...`) so PyObjectPath-based evaluators can serialize/deserialize them without collisions. The backing module is append-only; long-lived processes should avoid generating unbounded unique classes if cleanup is required. + ## Model Registry A `ModelRegistry` is a named collection of models. From 7d852437010b4be02ffb386aba99d57357694c59 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 19 Dec 2025 19:52:15 -0500 Subject: [PATCH 02/15] Try to improve code coverage Signed-off-by: Nijat Khanbabayev --- ccflow/tests/test_base.py | 9 ++++++--- ccflow/tests/test_callable.py | 12 +++++++++++- docs/wiki/Key-Features.md | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ccflow/tests/test_base.py b/ccflow/tests/test_base.py index d6d9413..656d841 100644 --- a/ccflow/tests/test_base.py +++ b/ccflow/tests/test_base.py @@ -205,10 +205,13 @@ class LocalCallable(CallableModel): def __call__(self, context: NullContext) -> GenericResult: return GenericResult(value="ok") - register.assert_called_once() - args, kwargs = register.call_args - self.assertIs(args[0], LocalCallable) + result = LocalCallable()(NullContext()) + + calls_for_local = [(args, kwargs) for args, kwargs in register.call_args_list if args and args[0] is LocalCallable] + self.assertEqual(len(calls_for_local), 1) + _, kwargs = calls_for_local[0] self.assertEqual(kwargs["kind"], "callable_model") + self.assertEqual(result.value, "ok") def test_explicit_override_respected(self): with mock.patch("ccflow.base.register_local_subclass") as register: diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 5b0f0a9..80ada25 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -623,12 +623,16 @@ def __call__(self, context: LocalContext) -> GenericResult: instance = LocalModel(factor=5) context = LocalContext(value=7) + result = instance(context) + self.assertEqual(result.value, 35) serialized_model = instance.model_dump(mode="python") restored_model = LocalModel.model_validate(serialized_model) self.assertEqual(restored_model, instance) serialized_context = context.model_dump(mode="python") restored_context = LocalContext.model_validate(serialized_context) self.assertEqual(restored_context, context) + restored_result = restored_model(restored_context) + self.assertEqual(restored_result.value, 35) def test_multiple_nested_levels_unique_paths(self): created = [] @@ -677,11 +681,17 @@ def __call__(self, context: LocalContext) -> GenericResult: self.assertEqual(len(context_names), len(created)) self.assertEqual(len(model_names), len(created)) - for _, ctx_cls, model_cls in created: + for label, ctx_cls, model_cls in created: self.assertIs(getattr(locals_module, ctx_cls.__qualname__), ctx_cls) self.assertIs(getattr(locals_module, model_cls.__qualname__), model_cls) self.assertIn("", getattr(ctx_cls, "__ccflow_dynamic_origin__")) self.assertIn("", getattr(model_cls, "__ccflow_dynamic_origin__")) + ctx_instance = ctx_cls(value=4) + result = model_cls()(ctx_instance) + if isinstance(label, str): # sibling group + self.assertEqual(result.value, ctx_instance.value) + else: + self.assertEqual(result.value, ctx_instance.value * (label + 1)) class TestWrapperModel(TestCase): diff --git a/docs/wiki/Key-Features.md b/docs/wiki/Key-Features.md index 1f5bd18..ea78c63 100644 --- a/docs/wiki/Key-Features.md +++ b/docs/wiki/Key-Features.md @@ -23,7 +23,7 @@ The naming was inspired by the open source library [Pydantic](https://docs.pydan As an example, you may have a `SQLReader` callable model that when called with a `DateRangeContext` returns a `ArrowResult` (wrapper around a Arrow table) with data in the date range defined by the context by querying some SQL database. > [!NOTE] -> `CallableModel`, `ContextBase`, and other `ccflow.BaseModel` subclasses declared inside local scopes (functions, tests, notebooks, REPLs, etc.) are automatically persisted under `ccflow._local_artifacts`. Each stored class is prefixed with its kind (for example, `callable_model__...` versus `context__...`) so PyObjectPath-based evaluators can serialize/deserialize them without collisions. The backing module is append-only; long-lived processes should avoid generating unbounded unique classes if cleanup is required. +> `CallableModel`, `ContextBase`, and other `ccflow.BaseModel` subclasses declared inside local scopes (functions, tests, etc.) are automatically persisted under `ccflow._local_artifacts`. Each stored class is prefixed with its kind (for example, `callable_model__...` versus `context__...`) so PyObjectPath-based evaluators can serialize/deserialize them without collisions. The backing module is append-only; long-lived processes should avoid generating unbounded unique classes if cleanup is required. ## Model Registry From 9fb2a924c8582cdb23461935090d0d1c11078ccd Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 19 Dec 2025 20:20:17 -0500 Subject: [PATCH 03/15] Only register not __main__ Signed-off-by: Nijat Khanbabayev --- ccflow/local_persistence.py | 3 +-- ccflow/tests/test_callable.py | 34 -------------------------- ccflow/tests/test_local_persistence.py | 4 +-- 3 files changed, 3 insertions(+), 38 deletions(-) diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index 7331a9b..7a2cc7b 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -37,9 +37,8 @@ def _ensure_module(name: str, doc: str) -> ModuleType: def _needs_registration(cls: Type[Any]) -> bool: - module = getattr(cls, "__module__", "") qualname = getattr(cls, "__qualname__", "") - return "" in qualname or module == "__main__" + return "" in qualname def _sanitize_identifier(value: str, fallback: str) -> str: diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 80ada25..329c21a 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -33,30 +33,6 @@ def _find_registered_name(module, cls): raise AssertionError(f"{cls} not found in {module.__name__}") -def _build_main_module_callable(): - namespace = { - "__name__": "__main__", - "ClassVar": ClassVar, - "CallableModel": CallableModel, - "Flow": Flow, - "GenericResult": GenericResult, - "NullContext": NullContext, - } - exec( - """ -class MainModuleCallable(CallableModel): - call_count: ClassVar[int] = 0 - - @Flow.call - def __call__(self, context: NullContext) -> GenericResult: - type(self).call_count += 1 - return GenericResult(value=\"main\") -""", - namespace, - ) - return namespace["MainModuleCallable"] - - class MyContext(ContextBase): a: str @@ -600,16 +576,6 @@ def test_local_context_type_path_roundtrip(self): self.assertEqual(path.object, LocalContext) self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - def test_exec_defined_main_module_class_registered(self): - MainCallable = _build_main_module_callable() - self.assertEqual(MainCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertTrue(getattr(MainCallable, "__ccflow_dynamic_origin__").startswith("__main__.")) - model = MainCallable() - MainCallable.call_count = 0 - result = model(NullContext()) - self.assertEqual(result.value, "main") - self.assertEqual(MainCallable.call_count, 1) - def test_local_context_and_model_serialization_roundtrip(self): class LocalContext(ContextBase): value: int diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index dccd2ef..bebf0bf 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -36,11 +36,11 @@ class LocalClass: LocalClass = build_class() self.assertTrue(local_persistence._needs_registration(LocalClass)) - def test_main_module_class_needs_registration(self): + def test_main_module_class_does_not_need_registration(self): cls = type("MainModuleClass", (), {}) cls.__module__ = "__main__" cls.__qualname__ = "MainModuleClass" - self.assertTrue(local_persistence._needs_registration(cls)) + self.assertFalse(local_persistence._needs_registration(cls)) def test_module_level_non_ccflow_class_does_not_need_registration(self): cls = type("ExternalClass", (), {}) From 32ae2196d2679a44a85dc99a6f5cec0a20f17173 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 19 Dec 2025 20:35:22 -0500 Subject: [PATCH 04/15] Add dummy example with CallableModel making callable models Signed-off-by: Nijat Khanbabayev --- ccflow/tests/evaluators/test_common.py | 18 +++++++++- ccflow/tests/local_helpers.py | 46 +++++++++++++++++++++++++- ccflow/tests/test_callable.py | 17 +++++++++- 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/ccflow/tests/evaluators/test_common.py b/ccflow/tests/evaluators/test_common.py index 4a4b503..fa85958 100644 --- a/ccflow/tests/evaluators/test_common.py +++ b/ccflow/tests/evaluators/test_common.py @@ -29,7 +29,7 @@ combine_evaluators, get_dependency_graph, ) -from ccflow.tests.local_helpers import build_nested_graph_chain +from ccflow.tests.local_helpers import build_meta_sensor_planner, build_nested_graph_chain from .util import CircularModel, MyDateCallable, MyDateRangeCallable, MyRaisingCallable, NodeModel, ResultModel @@ -220,6 +220,22 @@ def test_logging_options_nested(self): self.assertIn("End evaluation of __call__", captured.records[2].getMessage()) self.assertIn("time elapsed", captured.records[2].getMessage()) + def test_meta_callable_logged_with_evaluator(self): + """Meta callables can spin up request-scoped specialists and still inherit evaluator instrumentation.""" + SensorQuery, MetaSensorPlanner, captured = build_meta_sensor_planner() + evaluator = LoggingEvaluator(log_level=logging.INFO, verbose=False) + request = SensorQuery(sensor_type="pressure-valve", site="orbital-lab", window=4) + meta = MetaSensorPlanner(warm_start=2) + with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator)): + with self.assertLogs(level=logging.INFO) as captured_logs: + result = meta(request) + self.assertEqual(result.value, "planner:orbital-lab:pressure-valve:6") + start_messages = [record.getMessage() for record in captured_logs.records if "Start evaluation" in record.getMessage()] + self.assertEqual(len(start_messages), 2) + self.assertTrue(any("MetaSensorPlanner" in msg for msg in start_messages)) + specialist_name = captured["callable_cls"].__name__ + self.assertTrue(any(specialist_name in msg for msg in start_messages)) + class SubContext(DateContext): pass diff --git a/ccflow/tests/local_helpers.py b/ccflow/tests/local_helpers.py index 5f616cf..b5ffcdf 100644 --- a/ccflow/tests/local_helpers.py +++ b/ccflow/tests/local_helpers.py @@ -1,10 +1,54 @@ """Shared helpers for constructing local-scope contexts/models in tests.""" -from typing import ClassVar, Tuple, Type +from typing import ClassVar, Dict, Tuple, Type from ccflow import CallableModel, ContextBase, Flow, GenericResult, GraphDepList, NullContext +def build_meta_sensor_planner(): + """Return a (SensorQuery, MetaSensorPlanner, captured) tuple for meta-callable tests.""" + + captured: Dict[str, Type] = {} + + class SensorQuery(ContextBase): + sensor_type: str + site: str + window: int + + class MetaSensorPlanner(CallableModel): + warm_start: int = 2 + + @Flow.call + def __call__(self, context: SensorQuery) -> GenericResult: + # Define request-scoped specialist wiring with a bespoke context/model pair. + class SpecialistContext(ContextBase): + sensor_type: str + window: int + pipeline: str + + class SpecialistCallable(CallableModel): + pipeline: str + + @Flow.call + def __call__(self, context: SpecialistContext) -> GenericResult: + payload = f"{self.pipeline}:{context.sensor_type}:{context.window}" + return GenericResult(value=payload) + + captured["context_cls"] = SpecialistContext + captured["callable_cls"] = SpecialistCallable + + window = context.window + self.warm_start + local_context = SpecialistContext( + sensor_type=context.sensor_type, + window=window, + pipeline=f"{context.site}-calibration", + ) + specialist = SpecialistCallable(pipeline=f"planner:{context.site}") + return specialist(local_context) + + return SensorQuery, MetaSensorPlanner, captured + + def build_local_callable(name: str = "LocalCallable") -> Type[CallableModel]: class _LocalCallable(CallableModel): @Flow.call diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 329c21a..fd4a77c 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -23,7 +23,7 @@ WrapperModel, ) from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME -from ccflow.tests.local_helpers import build_local_callable, build_local_context +from ccflow.tests.local_helpers import build_local_callable, build_local_context, build_meta_sensor_planner def _find_registered_name(module, cls): @@ -659,6 +659,21 @@ def __call__(self, context: LocalContext) -> GenericResult: else: self.assertEqual(result.value, ctx_instance.value * (label + 1)) + def test_meta_callable_builds_dynamic_specialist(self): + SensorQuery, MetaSensorPlanner, captured = build_meta_sensor_planner() + request = SensorQuery(sensor_type="wind-turbine", site="ridge-line", window=5) + meta = MetaSensorPlanner(warm_start=3) + result = meta(request) + self.assertEqual(result.value, "planner:ridge-line:wind-turbine:8") + + locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + SpecialistContext = captured["context_cls"] + SpecialistCallable = captured["callable_cls"] + self.assertEqual(SpecialistContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertEqual(SpecialistCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertIs(getattr(locals_module, SpecialistContext.__qualname__), SpecialistContext) + self.assertIs(getattr(locals_module, SpecialistCallable.__qualname__), SpecialistCallable) + class TestWrapperModel(TestCase): def test_wrapper(self): From 7504a89135b61b9c3f81a94784cc2aca8226310b Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 19 Dec 2025 21:37:38 -0500 Subject: [PATCH 05/15] Add tests for pickling and unpickling local callable models and contexts Signed-off-by: Nijat Khanbabayev --- ccflow/tests/test_callable.py | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index fd4a77c..0d4a642 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -33,6 +33,20 @@ def _find_registered_name(module, cls): raise AssertionError(f"{cls} not found in {module.__name__}") +def create_sensor_scope_models(): + class SensorScope(ContextBase): + reading: int + + class SensorCalibrator(CallableModel): + offset: int = 1 + + @Flow.call + def __call__(self, context: SensorScope) -> GenericResult: + return GenericResult(value=context.reading + self.offset) + + return SensorScope, SensorCalibrator + + class MyContext(ContextBase): a: str @@ -674,6 +688,30 @@ def test_meta_callable_builds_dynamic_specialist(self): self.assertIs(getattr(locals_module, SpecialistContext.__qualname__), SpecialistContext) self.assertIs(getattr(locals_module, SpecialistCallable.__qualname__), SpecialistCallable) + def test_dynamic_factory_pickle_roundtrip(self): + serializers = [(pdumps, ploads), (rcpdumps, rcploads)] + for dumps, loads in serializers: + factory = loads(dumps(create_sensor_scope_models)) + SensorContext, SensorModel = factory() + self.assertEqual(SensorContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertEqual(SensorModel.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + self.assertIs(getattr(locals_module, SensorContext.__qualname__), SensorContext) + self.assertIs(getattr(locals_module, SensorModel.__qualname__), SensorModel) + + context = SensorContext(reading=41) + model = SensorModel(offset=1) + self.assertEqual(model(context).value, 42) + + restored_context_cls = loads(dumps(SensorContext)) + restored_model_cls = loads(dumps(SensorModel)) + self.assertIs(restored_context_cls, SensorContext) + self.assertIs(restored_model_cls, SensorModel) + + restored_context = loads(dumps(context)) + restored_model = loads(dumps(model)) + self.assertEqual(restored_model(restored_context).value, 42) + class TestWrapperModel(TestCase): def test_wrapper(self): From dea6dae884849eb4b0a6f06b4155120ec754b57d Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Mon, 22 Dec 2025 04:55:05 -0500 Subject: [PATCH 06/15] Utilize finder/loader pattern for dynamic module, add subprocess tests for isolation. Signed-off-by: Nijat Khanbabayev --- ccflow/local_persistence.py | 84 +++++++++++++++++++++++--- ccflow/tests/test_global_state.py | 5 +- ccflow/tests/test_local_persistence.py | 80 +++++++++++++++++++++++- 3 files changed, 157 insertions(+), 12 deletions(-) diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index 7a2cc7b..e173410 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -2,12 +2,17 @@ from __future__ import annotations +import importlib.abc +import importlib.util import re import sys from collections import defaultdict from itertools import count -from types import ModuleType -from typing import Any, DefaultDict, Type +from typing import TYPE_CHECKING, Any, DefaultDict, Type + +if TYPE_CHECKING: + from importlib.machinery import ModuleSpec + from types import ModuleType __all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "register_local_subclass") @@ -17,13 +22,70 @@ _SANITIZE_PATTERN = re.compile(r"[^0-9A-Za-z_]") _LOCAL_KIND_COUNTERS: DefaultDict[str, count] = defaultdict(lambda: count()) +_LOCAL_ARTIFACTS_MODULE: "ModuleType | None" = None + + +class _LocalArtifactsLoader(importlib.abc.Loader): + """Minimal loader so importlib can reload our synthetic module if needed.""" + + def __init__(self, *, doc: str) -> None: + self._doc = doc + + def create_module(self, spec: "ModuleSpec") -> "ModuleType | None": + """Defer to default module creation (keeping importlib from recursing).""" + return None + + def exec_module(self, module: "ModuleType") -> None: + module.__doc__ = module.__doc__ or self._doc + + +class _LocalArtifactsFinder(importlib.abc.MetaPathFinder): + """Ensures importlib can rediscover the synthetic module when reloading.""" + + def find_spec( + self, + fullname: str, + path: Any, + target: "ModuleType | None" = None, + ) -> "ModuleSpec | None": + if fullname != LOCAL_ARTIFACTS_MODULE_NAME: + return None + return _build_module_spec(fullname, _LOCAL_MODULE_DOC) + + +def _build_module_spec(name: str, doc: str) -> "ModuleSpec": + loader = _LocalArtifactsLoader(doc=doc) + spec = importlib.util.spec_from_loader( + name, + loader=loader, + origin="ccflow.local_persistence:_ensure_module", + ) + if spec is None: + raise ImportError(f"Unable to create spec for dynamic module {name!r}.") + spec.has_location = False + return spec + + +def _ensure_finder_installed() -> None: + for finder in sys.meta_path: + if isinstance(finder, _LocalArtifactsFinder): + return + sys.meta_path.insert(0, _LocalArtifactsFinder()) + +def _ensure_module(name: str, doc: str) -> "ModuleType": + """Materialize the synthetic module with a real spec/loader so importlib treats it like disk-backed code. -def _ensure_module(name: str, doc: str) -> ModuleType: - """Ensure the dynamic module exists so import paths remain stable.""" + We only do this on demand, but once built the finder/spec/loader plumbing + keeps reload, pickling, and other importlib consumers happy. The Python docs recommend this approach instead of creating modules directly via the constructor.""" + _ensure_finder_installed() module = sys.modules.get(name) if module is None: - module = ModuleType(name, doc) + # Create a proper ModuleSpec + loader so importlib reloads and introspection behave + # the same way they would for filesystem-backed modules + spec = _build_module_spec(name, doc) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore[union-attr] sys.modules[name] = module parent_name, _, attr = name.rpartition(".") if parent_name: @@ -33,7 +95,12 @@ def _ensure_module(name: str, doc: str) -> ModuleType: return module -_LOCAL_ARTIFACTS_MODULE = _ensure_module(LOCAL_ARTIFACTS_MODULE_NAME, _LOCAL_MODULE_DOC) +def _get_local_artifacts_module() -> "ModuleType": + """Lazily materialize the synthetic module to avoid errors during creation until needed.""" + global _LOCAL_ARTIFACTS_MODULE + if _LOCAL_ARTIFACTS_MODULE is None: + _LOCAL_ARTIFACTS_MODULE = _ensure_module(LOCAL_ARTIFACTS_MODULE_NAME, _LOCAL_MODULE_DOC) + return _LOCAL_ARTIFACTS_MODULE def _needs_registration(cls: Type[Any]) -> bool: @@ -65,7 +132,8 @@ def register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: name_hint = f"{getattr(cls, '__module__', '')}.{getattr(cls, '__qualname__', '')}" kind_slug = _sanitize_identifier(kind, "model").lower() unique_name = _build_unique_name(kind_slug=kind_slug, name_hint=name_hint) - setattr(_LOCAL_ARTIFACTS_MODULE, unique_name, cls) - cls.__module__ = _LOCAL_ARTIFACTS_MODULE.__name__ + artifacts_module = _get_local_artifacts_module() + setattr(artifacts_module, unique_name, cls) + cls.__module__ = artifacts_module.__name__ cls.__qualname__ = unique_name setattr(cls, "__ccflow_dynamic_origin__", name_hint) diff --git a/ccflow/tests/test_global_state.py b/ccflow/tests/test_global_state.py index e5b0d68..470afec 100644 --- a/ccflow/tests/test_global_state.py +++ b/ccflow/tests/test_global_state.py @@ -48,9 +48,8 @@ def test_global_state(root_registry): assert state3.open_overrides == {} -def test_global_state_pickle(): - r = ModelRegistry.root() - r.add("foo", DummyModel(name="foo")) +def test_global_state_pickle(root_registry): + root_registry.add("foo", DummyModel(name="foo")) evaluator = DummyEvaluator() with FlowOptionsOverride(options=dict(evaluator=evaluator)) as override: state = GlobalState() diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index bebf0bf..e9121e6 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -1,3 +1,6 @@ +import subprocess +import sys +import textwrap from collections import defaultdict from itertools import count from unittest import TestCase, mock @@ -23,7 +26,7 @@ def __call__(self, context: NullContext) -> GenericResult: class TestNeedsRegistration(TestCase): def test_module_level_ccflow_classes_do_not_need_registration(self): for cls in (ModuleLevelModel, ModuleLevelContext, ModuleLevelCallable): - with self.subTest(cls=cls): + with self.subTest(cls=cls.__name__): self.assertFalse(local_persistence._needs_registration(cls)) def test_local_scope_class_needs_registration(self): @@ -79,3 +82,78 @@ def test_empty_hint_uses_fallback(self): with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): name = local_persistence._build_unique_name(kind_slug="model", name_hint="") self.assertEqual(name, "model__BaseModel__0") + + +def _run_subprocess(code: str) -> str: + """Execute code in a clean interpreter so sys.modules starts empty.""" + result = subprocess.run( + [sys.executable, "-c", textwrap.dedent(code)], + check=True, + capture_output=True, + text=True, + ) + return result.stdout.strip() + + +def test_local_artifacts_module_is_lazy(): + output = _run_subprocess( + """ + import sys + import ccflow.local_persistence as lp + + print(lp.LOCAL_ARTIFACTS_MODULE_NAME in sys.modules) + """ + ) + assert output == "False" + + +def test_local_artifacts_module_reload_preserves_dynamic_attrs_and_qualname(): + output = _run_subprocess( + """ + import importlib + import ccflow.local_persistence as lp + + def build_cls(): + class _Temp: + pass + return _Temp + + Temp = build_cls() + lp.register_local_subclass(Temp, kind="demo") + module = importlib.import_module(lp.LOCAL_ARTIFACTS_MODULE_NAME) + qual_before = Temp.__qualname__ + before = getattr(module, qual_before) is Temp + module = importlib.reload(module) + after = getattr(module, qual_before) is Temp + print(before, after, qual_before == Temp.__qualname__) + """ + ) + assert output.split() == ["True", "True", "True"] + + +def test_register_local_subclass_sets_module_qualname_and_origin(): + output = _run_subprocess( + """ + import sys + import ccflow.local_persistence as lp + + def build(): + class Foo: + pass + return Foo + + Foo = build() + lp.register_local_subclass(Foo, kind="ModelThing") + module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] + print(Foo.__module__) + print(Foo.__qualname__) + print(hasattr(module, Foo.__qualname__)) + print(Foo.__ccflow_dynamic_origin__) + """ + ) + lines = output.splitlines() + assert lines[0] == "ccflow._local_artifacts" + assert lines[2] == "True" + assert lines[3] == "__main__.build..Foo" + assert lines[1].startswith("modelthing__") + assert lines[1].endswith("__0") From 9e52ab94ef65f5754cdc96ed6640861e47c0a122 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Mon, 22 Dec 2025 05:00:23 -0500 Subject: [PATCH 07/15] Make dynamic module functions private Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 4 ++-- ccflow/local_persistence.py | 5 ++--- ccflow/tests/test_base.py | 8 ++++---- ccflow/tests/test_local_persistence.py | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/ccflow/base.py b/ccflow/base.py index adfec2e..e111731 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -30,7 +30,7 @@ from typing_extensions import Self from .exttypes.pyobjectpath import PyObjectPath -from .local_persistence import register_local_subclass +from .local_persistence import _register_local_subclass log = logging.getLogger(__name__) @@ -164,7 +164,7 @@ def __pydantic_init_subclass__(cls, **kwargs): # __pydantic_init_subclass__ is the supported hook point once Pydantic finishes wiring the subclass. super().__pydantic_init_subclass__(**kwargs) kind = getattr(cls, "__ccflow_local_registration_kind__", "model") - register_local_subclass(cls, kind=kind) + _register_local_subclass(cls, kind=kind) @computed_field( alias="_target_", diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index e173410..478b2b1 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -14,8 +14,7 @@ from importlib.machinery import ModuleSpec from types import ModuleType -__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "register_local_subclass") - +__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME",) LOCAL_ARTIFACTS_MODULE_NAME = "ccflow._local_artifacts" _LOCAL_MODULE_DOC = "Auto-generated BaseModel subclasses created outside importable modules." @@ -122,7 +121,7 @@ def _build_unique_name(*, kind_slug: str, name_hint: str) -> str: return f"{kind_slug}__{sanitized_hint}__{next(counter)}" -def register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: +def _register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: """Register BaseModel subclasses created in local scopes.""" if getattr(cls, "__module__", "").startswith(LOCAL_ARTIFACTS_MODULE_NAME): return diff --git a/ccflow/tests/test_base.py b/ccflow/tests/test_base.py index 656d841..bd24cc1 100644 --- a/ccflow/tests/test_base.py +++ b/ccflow/tests/test_base.py @@ -176,7 +176,7 @@ def test_widget(self): class TestLocalRegistrationKind(TestCase): def test_base_model_defaults_to_model_kind(self): - with mock.patch("ccflow.base.register_local_subclass") as register: + with mock.patch("ccflow.base._register_local_subclass") as register: class LocalModel(BaseModel): value: int @@ -187,7 +187,7 @@ class LocalModel(BaseModel): self.assertEqual(kwargs["kind"], "model") def test_context_defaults_to_context_kind(self): - with mock.patch("ccflow.base.register_local_subclass") as register: + with mock.patch("ccflow.base._register_local_subclass") as register: class LocalContext(ContextBase): value: int @@ -198,7 +198,7 @@ class LocalContext(ContextBase): self.assertEqual(kwargs["kind"], "context") def test_callable_defaults_to_callable_kind(self): - with mock.patch("ccflow.base.register_local_subclass") as register: + with mock.patch("ccflow.base._register_local_subclass") as register: class LocalCallable(CallableModel): @Flow.call @@ -214,7 +214,7 @@ def __call__(self, context: NullContext) -> GenericResult: self.assertEqual(result.value, "ok") def test_explicit_override_respected(self): - with mock.patch("ccflow.base.register_local_subclass") as register: + with mock.patch("ccflow.base._register_local_subclass") as register: class CustomKind(BaseModel): __ccflow_local_registration_kind__ = "custom" diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index e9121e6..72d6be4 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -119,7 +119,7 @@ class _Temp: return _Temp Temp = build_cls() - lp.register_local_subclass(Temp, kind="demo") + lp._register_local_subclass(Temp, kind="demo") module = importlib.import_module(lp.LOCAL_ARTIFACTS_MODULE_NAME) qual_before = Temp.__qualname__ before = getattr(module, qual_before) is Temp @@ -143,7 +143,7 @@ class Foo: return Foo Foo = build() - lp.register_local_subclass(Foo, kind="ModelThing") + lp._register_local_subclass(Foo, kind="ModelThing") module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] print(Foo.__module__) print(Foo.__qualname__) From ff6acf168437929b1453e7882f79060f736435f0 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Fri, 26 Dec 2025 00:58:30 -0500 Subject: [PATCH 08/15] Adjust local module persistence to work with cloudpickle across processes Signed-off-by: Nijat Khanbabayev --- ccflow/exttypes/pyobjectpath.py | 11 +- ccflow/local_persistence.py | 70 ++- ccflow/tests/test_callable.py | 139 +++-- ccflow/tests/test_local_persistence.py | 692 ++++++++++++++++++++++++- 4 files changed, 845 insertions(+), 67 deletions(-) diff --git a/ccflow/exttypes/pyobjectpath.py b/ccflow/exttypes/pyobjectpath.py index 9c91b2f..9ef2bed 100644 --- a/ccflow/exttypes/pyobjectpath.py +++ b/ccflow/exttypes/pyobjectpath.py @@ -8,6 +8,8 @@ from pydantic_core import core_schema from typing_extensions import Self +from ccflow.local_persistence import _ensure_registered_at_import_path + _import_string_adapter = TypeAdapter(ImportString) @@ -56,7 +58,14 @@ def _validate(cls, value: Any): origin = get_origin(value) if origin: value = origin - if hasattr(value, "__module__") and hasattr(value, "__qualname__"): + + # Check for ccflow's import path override first (used for local-scope classes) + # This allows classes with '' in __qualname__ to remain importable + # while preserving cloudpickle's ability to serialize the class definition + if hasattr(value, "__ccflow_import_path__"): + _ensure_registered_at_import_path(value) + value = cls(value.__ccflow_import_path__) + elif hasattr(value, "__module__") and hasattr(value, "__qualname__"): if value.__module__ == "__builtin__": module = "builtins" else: diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index 478b2b1..5fd0efa 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -1,4 +1,20 @@ -"""Helpers for persisting BaseModel-derived classes defined inside local scopes.""" +"""Helpers for persisting BaseModel-derived classes defined inside local scopes. + +This module enables PyObjectPath validation for classes defined inside functions (which have +'' in their __qualname__ and aren't normally importable). + +Key design decision: We DON'T modify __module__ or __qualname__. This preserves cloudpickle's +ability to serialize the class definition for cross-process transfer. Instead, we set a +separate __ccflow_import_path__ attribute that PyObjectPath uses. + +Cross-process cloudpickle flow: +1. Process A creates a local class -> we set __ccflow_import_path__ on it +2. cloudpickle.dumps() serializes the class definition (because '' in __qualname__) + INCLUDING the __ccflow_import_path__ attribute we set +3. Process B: cloudpickle.loads() reconstructs the class with __ccflow_import_path__ already set +4. Process B: __pydantic_init_subclass__ runs, sees __ccflow_import_path__ exists, + re-registers the class in this process's _local_artifacts module +""" from __future__ import annotations @@ -121,10 +137,49 @@ def _build_unique_name(*, kind_slug: str, name_hint: str) -> str: return f"{kind_slug}__{sanitized_hint}__{next(counter)}" +def _ensure_registered_at_import_path(cls: Type[Any]) -> None: + """Ensure a class with __ccflow_import_path__ is actually registered in _local_artifacts. + + This handles the cross-process cloudpickle case: when cloudpickle reconstructs a class, + it has __ccflow_import_path__ set (serialized with the class definition), but the class + isn't registered in _local_artifacts in the new process yet. + + Called from both _register_local_subclass (during class creation/unpickling) and + PyObjectPath validation (when accessing type_). + """ + import_path = getattr(cls, "__ccflow_import_path__", None) + if import_path is None or not import_path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."): + return + + registered_name = import_path.rsplit(".", 1)[-1] + artifacts_module = _get_local_artifacts_module() + + # Re-register if not present or points to different class + if getattr(artifacts_module, registered_name, None) is not cls: + setattr(artifacts_module, registered_name, cls) + + def _register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: - """Register BaseModel subclasses created in local scopes.""" - if getattr(cls, "__module__", "").startswith(LOCAL_ARTIFACTS_MODULE_NAME): + """Register BaseModel subclasses created in local scopes. + + This enables PyObjectPath validation for classes that aren't normally importable + (e.g., classes defined inside functions). The class is registered in a synthetic + module (`ccflow._local_artifacts`) so it can be imported via the stored path. + + IMPORTANT: This function does NOT change __module__ or __qualname__. This is + intentional - it preserves cloudpickle's ability to serialize the class definition + for cross-process transfer. If __qualname__ contains '', cloudpickle + recognizes the class isn't normally importable and serializes its full definition. + + Args: + cls: The class to register. + kind: A slug identifying the type of class (e.g., "model", "context", "callable_model"). + """ + # If already has import path, just ensure it's registered (handles cross-process unpickling) + if hasattr(cls, "__ccflow_import_path__"): + _ensure_registered_at_import_path(cls) return + if not _needs_registration(cls): return @@ -133,6 +188,9 @@ def _register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: unique_name = _build_unique_name(kind_slug=kind_slug, name_hint=name_hint) artifacts_module = _get_local_artifacts_module() setattr(artifacts_module, unique_name, cls) - cls.__module__ = artifacts_module.__name__ - cls.__qualname__ = unique_name - setattr(cls, "__ccflow_dynamic_origin__", name_hint) + + # Store the import path as a separate attribute - DON'T change __module__ or __qualname__ + # This preserves cloudpickle's ability to serialize the class definition. + # The original module/qualname can still be retrieved via cls.__module__ and cls.__qualname__. + import_path = f"{artifacts_module.__name__}.{unique_name}" + setattr(cls, "__ccflow_import_path__", import_path) diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 0d4a642..56c5737 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -530,23 +530,46 @@ def test_module_level_context_retains_module(self): if dynamic_module: self.assertFalse(any(value is MyContext for value in vars(dynamic_module).values())) - def test_local_class_moves_under_dynamic_namespace(self): + def test_local_class_registered_with_import_path(self): + """Test that local-scope classes preserve __module__/__qualname__ but get an import path. + + The new behavior: + - __module__ stays as the original (preserves cloudpickle serialization) + - __qualname__ stays with '' (preserves cloudpickle serialization) + - __ccflow_import_path__ is set for PyObjectPath validation + - Class is registered in _local_artifacts under a unique name + """ LocalCallable = build_local_callable() - module_name = LocalCallable.__module__ - self.assertEqual(module_name, LOCAL_ARTIFACTS_MODULE_NAME) - dynamic_module = sys.modules[module_name] - self.assertIs(getattr(dynamic_module, LocalCallable.__qualname__), LocalCallable) - self.assertIn("", getattr(LocalCallable, "__ccflow_dynamic_origin__")) + # __module__ should NOT change to _local_artifacts + self.assertNotEqual(LocalCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + # __qualname__ should still have '' + self.assertIn("", LocalCallable.__qualname__) + # But __ccflow_import_path__ should be set and point to _local_artifacts + self.assertTrue(hasattr(LocalCallable, "__ccflow_import_path__")) + import_path = LocalCallable.__ccflow_import_path__ + self.assertTrue(import_path.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + # Class should be registered in _local_artifacts under the import path + dynamic_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + registered_name = import_path.split(".")[-1] + self.assertIs(getattr(dynamic_module, registered_name), LocalCallable) + # Functionality should work result = LocalCallable()(NullContext()) self.assertEqual(result.value, "local") - def test_multiple_local_definitions_have_unique_identifiers(self): + def test_multiple_local_definitions_have_unique_import_paths(self): + """Test that multiple local classes get unique import paths.""" first = build_local_callable() second = build_local_callable() - self.assertNotEqual(first.__qualname__, second.__qualname__) + # __qualname__ stays the same (both are 'build_local_callable.._LocalCallable') + self.assertEqual(first.__qualname__, second.__qualname__) + # But __ccflow_import_path__ should be unique + self.assertNotEqual(first.__ccflow_import_path__, second.__ccflow_import_path__) + # Both should be registered in _local_artifacts dynamic_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - self.assertIs(getattr(dynamic_module, first.__qualname__), first) - self.assertIs(getattr(dynamic_module, second.__qualname__), second) + first_name = first.__ccflow_import_path__.split(".")[-1] + second_name = second.__ccflow_import_path__.split(".")[-1] + self.assertIs(getattr(dynamic_module, first_name), first) + self.assertIs(getattr(dynamic_module, second_name), second) def test_context_and_callable_same_name_do_not_collide(self): def build_conflicting(): @@ -614,7 +637,8 @@ def __call__(self, context: LocalContext) -> GenericResult: restored_result = restored_model(restored_context) self.assertEqual(restored_result.value, 35) - def test_multiple_nested_levels_unique_paths(self): + def test_multiple_nested_levels_unique_import_paths(self): + """Test that multiple nested local classes all get unique import paths.""" created = [] def layer(depth: int): @@ -656,16 +680,21 @@ def __call__(self, context: LocalContext) -> GenericResult: locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - context_names = {ctx.__qualname__ for _, ctx, _ in created} - model_names = {model.__qualname__ for _, _, model in created} - self.assertEqual(len(context_names), len(created)) - self.assertEqual(len(model_names), len(created)) + # __ccflow_import_path__ should be unique for each class + context_import_paths = {ctx.__ccflow_import_path__ for _, ctx, _ in created} + model_import_paths = {model.__ccflow_import_path__ for _, _, model in created} + self.assertEqual(len(context_import_paths), len(created)) + self.assertEqual(len(model_import_paths), len(created)) for label, ctx_cls, model_cls in created: - self.assertIs(getattr(locals_module, ctx_cls.__qualname__), ctx_cls) - self.assertIs(getattr(locals_module, model_cls.__qualname__), model_cls) - self.assertIn("", getattr(ctx_cls, "__ccflow_dynamic_origin__")) - self.assertIn("", getattr(model_cls, "__ccflow_dynamic_origin__")) + # Each class should be registered under its import path + ctx_name = ctx_cls.__ccflow_import_path__.split(".")[-1] + model_name = model_cls.__ccflow_import_path__.split(".")[-1] + self.assertIs(getattr(locals_module, ctx_name), ctx_cls) + self.assertIs(getattr(locals_module, model_name), model_cls) + # Original qualname should still have '' (preserved for cloudpickle) + self.assertIn("", ctx_cls.__qualname__) + self.assertIn("", model_cls.__qualname__) ctx_instance = ctx_cls(value=4) result = model_cls()(ctx_instance) if isinstance(label, str): # sibling group @@ -683,34 +712,54 @@ def test_meta_callable_builds_dynamic_specialist(self): locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] SpecialistContext = captured["context_cls"] SpecialistCallable = captured["callable_cls"] - self.assertEqual(SpecialistContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertEqual(SpecialistCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertIs(getattr(locals_module, SpecialistContext.__qualname__), SpecialistContext) - self.assertIs(getattr(locals_module, SpecialistCallable.__qualname__), SpecialistCallable) + # __module__ should NOT change (preserves cloudpickle) + self.assertNotEqual(SpecialistContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertNotEqual(SpecialistCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + # But __ccflow_import_path__ should be set and classes should be registered + ctx_name = SpecialistContext.__ccflow_import_path__.split(".")[-1] + model_name = SpecialistCallable.__ccflow_import_path__.split(".")[-1] + self.assertIs(getattr(locals_module, ctx_name), SpecialistContext) + self.assertIs(getattr(locals_module, model_name), SpecialistCallable) def test_dynamic_factory_pickle_roundtrip(self): - serializers = [(pdumps, ploads), (rcpdumps, rcploads)] - for dumps, loads in serializers: - factory = loads(dumps(create_sensor_scope_models)) - SensorContext, SensorModel = factory() - self.assertEqual(SensorContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertEqual(SensorModel.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - self.assertIs(getattr(locals_module, SensorContext.__qualname__), SensorContext) - self.assertIs(getattr(locals_module, SensorModel.__qualname__), SensorModel) - - context = SensorContext(reading=41) - model = SensorModel(offset=1) - self.assertEqual(model(context).value, 42) - - restored_context_cls = loads(dumps(SensorContext)) - restored_model_cls = loads(dumps(SensorModel)) - self.assertIs(restored_context_cls, SensorContext) - self.assertIs(restored_model_cls, SensorModel) - - restored_context = loads(dumps(context)) - restored_model = loads(dumps(model)) - self.assertEqual(restored_model(restored_context).value, 42) + """Test that dynamically created local classes can be pickled with cloudpickle. + + Note: Standard pickle can't handle classes with '' in __qualname__ + because it tries to import the class by module.qualname. Cloudpickle can + serialize the class definition, which is essential for distributed computing + (e.g., Ray tasks). This tradeoff enables cross-process cloudpickle support + while PyObjectPath still works via __ccflow_import_path__. + """ + # Test with cloudpickle (which can serialize class definitions) + factory = rcploads(rcpdumps(create_sensor_scope_models)) + SensorContext, SensorModel = factory() + # __module__ should NOT change (preserves cloudpickle) + self.assertNotEqual(SensorContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + self.assertNotEqual(SensorModel.__module__, LOCAL_ARTIFACTS_MODULE_NAME) + # But __ccflow_import_path__ should be set and point to _local_artifacts + self.assertTrue(SensorContext.__ccflow_import_path__.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + self.assertTrue(SensorModel.__ccflow_import_path__.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) + # Classes should be registered in _local_artifacts + locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] + ctx_name = SensorContext.__ccflow_import_path__.split(".")[-1] + model_name = SensorModel.__ccflow_import_path__.split(".")[-1] + self.assertIs(getattr(locals_module, ctx_name), SensorContext) + self.assertIs(getattr(locals_module, model_name), SensorModel) + + context = SensorContext(reading=41) + model = SensorModel(offset=1) + self.assertEqual(model(context).value, 42) + + # Class roundtrip with cloudpickle + restored_context_cls = rcploads(rcpdumps(SensorContext)) + restored_model_cls = rcploads(rcpdumps(SensorModel)) + self.assertIs(restored_context_cls, SensorContext) + self.assertIs(restored_model_cls, SensorModel) + + # Instance roundtrip with cloudpickle + restored_context = rcploads(rcpdumps(context)) + restored_model = rcploads(rcpdumps(model)) + self.assertEqual(restored_model(restored_context).value, 42) class TestWrapperModel(TestCase): diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 72d6be4..085aec0 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -5,6 +5,8 @@ from itertools import count from unittest import TestCase, mock +import ray + import ccflow.local_persistence as local_persistence from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext @@ -107,7 +109,7 @@ def test_local_artifacts_module_is_lazy(): assert output == "False" -def test_local_artifacts_module_reload_preserves_dynamic_attrs_and_qualname(): +def test_local_artifacts_module_reload_preserves_dynamic_attrs(): output = _run_subprocess( """ import importlib @@ -121,17 +123,23 @@ class _Temp: Temp = build_cls() lp._register_local_subclass(Temp, kind="demo") module = importlib.import_module(lp.LOCAL_ARTIFACTS_MODULE_NAME) - qual_before = Temp.__qualname__ - before = getattr(module, qual_before) is Temp + + # Extract the registered name from __ccflow_import_path__ + import_path = Temp.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + + before = getattr(module, registered_name) is Temp module = importlib.reload(module) - after = getattr(module, qual_before) is Temp - print(before, after, qual_before == Temp.__qualname__) + after = getattr(module, registered_name) is Temp + + # __qualname__ should NOT have changed (preserves cloudpickle behavior) + print(before, after, "" in Temp.__qualname__) """ ) assert output.split() == ["True", "True", "True"] -def test_register_local_subclass_sets_module_qualname_and_origin(): +def test_register_local_subclass_preserves_module_qualname_and_sets_import_path(): output = _run_subprocess( """ import sys @@ -143,17 +151,671 @@ class Foo: return Foo Foo = build() + original_module = Foo.__module__ + original_qualname = Foo.__qualname__ lp._register_local_subclass(Foo, kind="ModelThing") module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] - print(Foo.__module__) - print(Foo.__qualname__) - print(hasattr(module, Foo.__qualname__)) - print(Foo.__ccflow_dynamic_origin__) + + # __module__ and __qualname__ should NOT change (preserves cloudpickle) + print(Foo.__module__ == original_module) + print(Foo.__qualname__ == original_qualname) + print("" in Foo.__qualname__) + + # __ccflow_import_path__ should be set and point to the registered class + import_path = Foo.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + print(hasattr(module, registered_name)) + print(getattr(module, registered_name) is Foo) + print(import_path.startswith("ccflow._local_artifacts.modelthing__")) """ ) lines = output.splitlines() - assert lines[0] == "ccflow._local_artifacts" - assert lines[2] == "True" - assert lines[3] == "__main__.build..Foo" - assert lines[1].startswith("modelthing__") - assert lines[1].endswith("__0") + assert lines[0] == "True", f"__module__ should not change: {lines}" + assert lines[1] == "True", f"__qualname__ should not change: {lines}" + assert lines[2] == "True", f"__qualname__ should contain '': {lines}" + assert lines[3] == "True", f"Class should be registered in module: {lines}" + assert lines[4] == "True", f"Registered class should be the same object: {lines}" + assert lines[5] == "True", f"Import path should start with expected prefix: {lines}" + + +def test_local_basemodel_cloudpickle_cross_process(): + """Test that local-scope BaseModel subclasses work with cloudpickle cross-process. + + This is the key test for the "best of both worlds" approach: + - __qualname__ has '' so cloudpickle serializes the class definition + - __ccflow_import_path__ allows PyObjectPath validation to work + - After unpickling, __pydantic_init_subclass__ re-registers the class + """ + import os + import tempfile + + pkl_path = tempfile.mktemp(suffix=".pkl") + + try: + # Create a local-scope BaseModel in subprocess 1 and pickle it + create_result = subprocess.run( + [ + sys.executable, + "-c", + f""" +from ray.cloudpickle import dump +from ccflow import BaseModel + +def create_local_model(): + class LocalModel(BaseModel): + value: int + + return LocalModel + +LocalModel = create_local_model() + +# Verify __qualname__ has '' (enables cloudpickle serialization) +assert "" in LocalModel.__qualname__, f"Expected '' in qualname: {{LocalModel.__qualname__}}" + +# Verify __ccflow_import_path__ is set (enables PyObjectPath) +assert hasattr(LocalModel, "__ccflow_import_path__"), "Expected __ccflow_import_path__ to be set" + +# Create instance and verify type_ works (PyObjectPath validation) +instance = LocalModel(value=42) +type_path = instance.type_ +print(f"type_: {{type_path}}") + +# Pickle the instance +with open("{pkl_path}", "wb") as f: + dump(instance, f) + +print("SUCCESS: Created and pickled") +""", + ], + capture_output=True, + text=True, + ) + assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" + + # Load in subprocess 2 (different process, class not pre-defined) + load_result = subprocess.run( + [ + sys.executable, + "-c", + f""" +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + obj = load(f) + +# Verify the value was preserved +assert obj.value == 42, f"Expected value=42, got {{obj.value}}" + +# Verify type_ works after unpickling (class was re-registered) +type_path = obj.type_ +print(f"type_: {{type_path}}") + +# Verify the import path works +import importlib +path_parts = str(type_path).rsplit(".", 1) +module = importlib.import_module(path_parts[0]) +cls = getattr(module, path_parts[1]) +assert cls is type(obj), "Import path should resolve to the same class" + +print("SUCCESS: Loaded and verified") +""", + ], + capture_output=True, + text=True, + ) + assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + +# ============================================================================= +# Comprehensive tests for local persistence and PyObjectPath integration +# ============================================================================= + + +class TestLocalPersistencePreservesCloudpickle: + """Tests verifying that local persistence preserves cloudpickle behavior.""" + + def test_qualname_has_locals_for_function_defined_class(self): + """Verify that __qualname__ contains '' for classes defined in functions.""" + + def create_class(): + class Inner(BaseModel): + x: int + + return Inner + + cls = create_class() + assert "" in cls.__qualname__ + assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME + + def test_module_not_changed_to_local_artifacts(self): + """Verify that __module__ is NOT changed to _local_artifacts.""" + + def create_class(): + class Inner(ContextBase): + value: str + + return Inner + + cls = create_class() + # __module__ should be this test module, not _local_artifacts + assert cls.__module__ == "ccflow.tests.test_local_persistence" + assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME + + def test_ccflow_import_path_is_set(self): + """Verify that __ccflow_import_path__ is set for local classes.""" + + def create_class(): + class Inner(BaseModel): + y: float + + return Inner + + cls = create_class() + assert hasattr(cls, "__ccflow_import_path__") + assert cls.__ccflow_import_path__.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME + ".") + + def test_class_registered_in_local_artifacts(self): + """Verify that the class is registered in _local_artifacts under import path.""" + import sys + + def create_class(): + class Inner(BaseModel): + z: bool + + return Inner + + cls = create_class() + import_path = cls.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + + artifacts_module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] + assert hasattr(artifacts_module, registered_name) + assert getattr(artifacts_module, registered_name) is cls + + +class TestPyObjectPathWithImportPath: + """Tests for PyObjectPath integration with __ccflow_import_path__.""" + + def test_type_property_uses_import_path(self): + """Verify that the type_ property returns a path using __ccflow_import_path__.""" + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + instance = cls(value=123) + type_path = str(instance.type_) + + # type_ should use the __ccflow_import_path__, not module.qualname + assert type_path == cls.__ccflow_import_path__ + assert type_path.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME) + + def test_type_path_can_be_imported(self): + """Verify that the type_ path can be used to import the class.""" + import importlib + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + instance = cls(value=456) + type_path = str(instance.type_) + + # Should be able to import using the path + parts = type_path.rsplit(".", 1) + module = importlib.import_module(parts[0]) + imported_cls = getattr(module, parts[1]) + assert imported_cls is cls + + def test_type_property_for_context_base(self): + """Verify type_ works for ContextBase subclasses.""" + + def create_class(): + class LocalContext(ContextBase): + name: str + + return LocalContext + + cls = create_class() + instance = cls(name="test") + type_path = str(instance.type_) + + assert type_path == cls.__ccflow_import_path__ + assert instance.type_.object is cls + + def test_json_serialization_includes_target(self): + """Verify JSON serialization includes _target_ using __ccflow_import_path__.""" + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + instance = cls(value=789) + data = instance.model_dump(mode="python") + + assert "type_" in data or "_target_" in data + # The computed field should use __ccflow_import_path__ + type_value = data.get("type_") or data.get("_target_") + assert str(type_value).startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME) + + +class TestCloudpickleSameProcess: + """Tests for same-process cloudpickle behavior.""" + + def test_cloudpickle_class_roundtrip_same_process(self): + """Verify cloudpickle can serialize and deserialize local classes in same process.""" + from ray.cloudpickle import dumps, loads + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + restored_cls = loads(dumps(cls)) + + # Should be the same object (cloudpickle recognizes it's in the same process) + assert restored_cls is cls + + def test_cloudpickle_instance_roundtrip_same_process(self): + """Verify cloudpickle can serialize and deserialize instances in same process.""" + from ray.cloudpickle import dumps, loads + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + instance = cls(value=42) + restored = loads(dumps(instance)) + + assert restored.value == 42 + assert type(restored) is cls + + def test_cloudpickle_preserves_type_path(self): + """Verify type_ works after cloudpickle roundtrip in same process.""" + from ray.cloudpickle import dumps, loads + + def create_class(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + cls = create_class() + instance = cls(value=100) + original_type_path = str(instance.type_) + + restored = loads(dumps(instance)) + restored_type_path = str(restored.type_) + + assert restored_type_path == original_type_path + + +class TestCloudpickleCrossProcess: + """Tests for cross-process cloudpickle behavior (subprocess tests).""" + + def test_context_base_cross_process(self): + """Test cross-process cloudpickle for ContextBase subclasses.""" + import os + import tempfile + + pkl_path = tempfile.mktemp(suffix=".pkl") + + try: + create_code = f''' +from ray.cloudpickle import dump +from ccflow import ContextBase + +def create_context(): + class LocalContext(ContextBase): + name: str + value: int + return LocalContext + +LocalContext = create_context() +assert "" in LocalContext.__qualname__ +instance = LocalContext(name="test", value=42) +_ = instance.type_ # Verify type_ works before pickle + +with open("{pkl_path}", "wb") as f: + dump(instance, f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + obj = load(f) + +assert obj.name == "test" +assert obj.value == 42 +type_path = obj.type_ # Verify type_ works after unpickle +assert type_path.object is type(obj) +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + def test_callable_model_cross_process(self): + """Test cross-process cloudpickle for CallableModel subclasses.""" + import os + import tempfile + + pkl_path = tempfile.mktemp(suffix=".pkl") + + try: + create_code = f''' +from ray.cloudpickle import dump +from ccflow import CallableModel, ContextBase, GenericResult, Flow + +def create_callable(): + class LocalContext(ContextBase): + x: int + + class LocalCallable(CallableModel): + multiplier: int = 2 + + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + return GenericResult(value=context.x * self.multiplier) + + return LocalContext, LocalCallable + +LocalContext, LocalCallable = create_callable() +instance = LocalCallable(multiplier=3) +context = LocalContext(x=10) + +# Verify it works +result = instance(context) +assert result.value == 30 + +with open("{pkl_path}", "wb") as f: + dump((instance, context), f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + instance, context = load(f) + +# Verify the callable works after unpickle +result = instance(context) +assert result.value == 30 + +# Verify type_ works +assert instance.type_.object is type(instance) +assert context.type_.object is type(context) +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + def test_nested_local_classes_cross_process(self): + """Test cross-process cloudpickle for multiply-nested local classes.""" + import os + import tempfile + + pkl_path = tempfile.mktemp(suffix=".pkl") + + try: + create_code = f''' +from ray.cloudpickle import dump +from ccflow import BaseModel + +def outer(): + def inner(): + class DeeplyNested(BaseModel): + value: int + return DeeplyNested + return inner() + +cls = outer() +assert "" in cls.__qualname__ +assert cls.__qualname__.count("") == 2 # Two levels of nesting + +instance = cls(value=999) +with open("{pkl_path}", "wb") as f: + dump(instance, f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + obj = load(f) + +assert obj.value == 999 +_ = obj.type_ # Verify type_ works +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + def test_multiple_instances_same_local_class_cross_process(self): + """Test that multiple instances of the same local class work cross-process.""" + import os + import tempfile + + pkl_path = tempfile.mktemp(suffix=".pkl") + + try: + create_code = f''' +from ray.cloudpickle import dump +from ccflow import BaseModel + +def create_class(): + class LocalModel(BaseModel): + value: int + return LocalModel + +cls = create_class() +instances = [cls(value=i) for i in range(5)] + +with open("{pkl_path}", "wb") as f: + dump(instances, f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + instances = load(f) + +# All instances should have the correct values +for i, instance in enumerate(instances): + assert instance.value == i + +# All instances should be of the same class +assert len(set(type(inst) for inst in instances)) == 1 + +# type_ should work for all +for instance in instances: + _ = instance.type_ +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + +class TestModuleLevelClassesUnaffected: + """Tests verifying that module-level classes are not affected by local persistence.""" + + def test_module_level_class_no_import_path(self): + """Verify module-level classes don't get __ccflow_import_path__.""" + assert not hasattr(ModuleLevelModel, "__ccflow_import_path__") + assert not hasattr(ModuleLevelContext, "__ccflow_import_path__") + assert not hasattr(ModuleLevelCallable, "__ccflow_import_path__") + + def test_module_level_class_type_path_uses_qualname(self): + """Verify module-level classes use standard module.qualname for type_.""" + instance = ModuleLevelModel(value=1) + type_path = str(instance.type_) + + # Should use standard path, not _local_artifacts + assert type_path == "ccflow.tests.test_local_persistence.ModuleLevelModel" + assert local_persistence.LOCAL_ARTIFACTS_MODULE_NAME not in type_path + + def test_module_level_standard_pickle_works(self): + """Verify standard pickle works for module-level classes.""" + from pickle import dumps, loads + + instance = ModuleLevelModel(value=42) + restored = loads(dumps(instance)) + assert restored.value == 42 + assert type(restored) is ModuleLevelModel + + +class TestRayTaskWithLocalClasses: + """Tests for Ray task execution with locally-defined classes. + + These tests verify that the local persistence mechanism works correctly + when classes are serialized and sent to Ray workers (different processes). + """ + + def test_local_callable_model_ray_task(self): + """Test that locally-defined CallableModels can be sent to Ray tasks. + + This is the ultimate test of cross-process cloudpickle support: + - Local class defined in function (has in __qualname__) + - Sent to Ray worker (different process) + - Executed and returns correct result + - type_ property works after execution (PyObjectPath validation) + """ + + def create_local_callable(): + class LocalContext(ContextBase): + x: int + + class LocalCallable(CallableModel): + multiplier: int = 2 + + @Flow.call + def __call__(self, context: LocalContext) -> GenericResult: + return GenericResult(value=context.x * self.multiplier) + + return LocalContext, LocalCallable + + LocalContext, LocalCallable = create_local_callable() + + # Verify is in qualname (ensures cloudpickle serializes definition) + assert "" in LocalCallable.__qualname__ + assert "" in LocalContext.__qualname__ + + # Verify __ccflow_import_path__ is set + assert hasattr(LocalCallable, "__ccflow_import_path__") + assert hasattr(LocalContext, "__ccflow_import_path__") + + @ray.remote + def run_callable(model, context): + result = model(context) + # Verify type_ works inside the Ray task (cross-process PyObjectPath) + _ = model.type_ + _ = context.type_ + return result.value + + model = LocalCallable(multiplier=3) + context = LocalContext(x=10) + + with ray.init(num_cpus=1): + result = ray.get(run_callable.remote(model, context)) + + assert result == 30 + + def test_local_context_ray_task(self): + """Test that locally-defined ContextBase can be sent to Ray tasks.""" + + def create_local_context(): + class LocalContext(ContextBase): + name: str + value: int + + return LocalContext + + LocalContext = create_local_context() + assert "" in LocalContext.__qualname__ + + @ray.remote + def process_context(ctx): + # Access fields and type_ inside Ray task + _ = ctx.type_ + return f"{ctx.name}:{ctx.value}" + + context = LocalContext(name="test", value=42) + + with ray.init(num_cpus=1): + result = ray.get(process_context.remote(context)) + + assert result == "test:42" + + def test_local_base_model_ray_task(self): + """Test that locally-defined BaseModel can be sent to Ray tasks.""" + + def create_local_model(): + class LocalModel(BaseModel): + data: str + + return LocalModel + + LocalModel = create_local_model() + assert "" in LocalModel.__qualname__ + + @ray.remote + def process_model(m): + # Access type_ inside Ray task + type_path = str(m.type_) + return f"{m.data}|{type_path}" + + model = LocalModel(data="hello") + + with ray.init(num_cpus=1): + result = ray.get(process_model.remote(model)) + + assert result.startswith("hello|ccflow._local_artifacts.") From b06ea001491f5c29ae755a9de8b3c84ff3c563af Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Tue, 30 Dec 2025 00:15:53 -0500 Subject: [PATCH 09/15] Don't create dynamic module, add more tests for locally defined models Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 30 +- ccflow/callable.py | 2 - ccflow/exttypes/pyobjectpath.py | 132 ++- ccflow/local_persistence.py | 227 +---- ccflow/tests/exttypes/test_pyobjectpath.py | 17 + ccflow/tests/test_base.py | 48 +- ccflow/tests/test_callable.py | 11 +- ccflow/tests/test_local_persistence.py | 1051 ++++++++++++++++++-- docs/wiki/Key-Features.md | 3 - 9 files changed, 1154 insertions(+), 367 deletions(-) diff --git a/ccflow/base.py b/ccflow/base.py index e111731..8630d23 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -30,7 +30,7 @@ from typing_extensions import Self from .exttypes.pyobjectpath import PyObjectPath -from .local_persistence import _register_local_subclass +from .local_persistence import _register, _register_local_subclass_if_needed log = logging.getLogger(__name__) @@ -157,14 +157,30 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta - Registration by name, and coercion from string name to allow for object re-use from the configs """ - __ccflow_local_registration_kind__: ClassVar[str] = "model" - @classmethod def __pydantic_init_subclass__(cls, **kwargs): - # __pydantic_init_subclass__ is the supported hook point once Pydantic finishes wiring the subclass. super().__pydantic_init_subclass__(**kwargs) - kind = getattr(cls, "__ccflow_local_registration_kind__", "model") - _register_local_subclass(cls, kind=kind) + # Register local-scope classes so they're importable via PyObjectPath. + # At definition time, we can only detect in qualname - full check deferred. + # Note: Cross-process unpickle sync (when __ccflow_import_path__ is already set) happens + # lazily via _register_local_subclass_if_needed, since cloudpickle sets class attributes + # AFTER __pydantic_init_subclass__ runs. + if "" in cls.__qualname__: + _register(cls) + + def __reduce_ex__(self, protocol): + # Ensure the class is registered before pickling. This is necessary for classes + # created dynamically (e.g., via pydantic's create_model) that don't have '' + # in their __qualname__. Without this, such classes would get different UUIDs in + # different processes if pickled before type_ is accessed, breaking cross-process + # consistency. By registering here, we guarantee __ccflow_import_path__ is set + # before cloudpickle serializes the class definition. + # + # We use __reduce_ex__ instead of __reduce__ because it's called first, allowing us + # to perform the registration side effect. We then return NotImplemented to let + # pydantic's default pickling mechanism take over, preserving the pickle format. + _register_local_subclass_if_needed(type(self)) + return super().__reduce_ex__(protocol) @computed_field( alias="_target_", @@ -830,8 +846,6 @@ class ContextBase(ResultBase): that is an input into another CallableModel. """ - __ccflow_local_registration_kind__: ClassVar[str] = "context" - model_config = ConfigDict( frozen=True, arbitrary_types_allowed=False, diff --git a/ccflow/callable.py b/ccflow/callable.py index 595079d..b09eaea 100644 --- a/ccflow/callable.py +++ b/ccflow/callable.py @@ -74,8 +74,6 @@ class _CallableModel(BaseModel, abc.ABC): The purpose of this class is to provide type definitions of context_type and return_type. """ - __ccflow_local_registration_kind__: ClassVar[str] = "callable_model" - model_config = ConfigDict( ignored_types=(property,), ) diff --git a/ccflow/exttypes/pyobjectpath.py b/ccflow/exttypes/pyobjectpath.py index 9ef2bed..10131c5 100644 --- a/ccflow/exttypes/pyobjectpath.py +++ b/ccflow/exttypes/pyobjectpath.py @@ -1,28 +1,76 @@ """This module contains extension types for pydantic.""" +import importlib from functools import cached_property, lru_cache from types import FunctionType, MethodType, ModuleType from typing import Any, Type, get_origin -from pydantic import ImportString, TypeAdapter +from pydantic import TypeAdapter from pydantic_core import core_schema from typing_extensions import Self -from ccflow.local_persistence import _ensure_registered_at_import_path - -_import_string_adapter = TypeAdapter(ImportString) +from ccflow.local_persistence import _register_local_subclass_if_needed @lru_cache(maxsize=None) -def import_string(input_string: str): - return _import_string_adapter.validate_python(input_string) +def import_string(dotted_path: str) -> Any: + """Import an object from a dotted path string. + + Handles nested class paths like 'module.OuterClass.InnerClass' by progressively + trying shorter module paths and using getattr for the remaining parts. + + This is more flexible than pydantic's ImportString which can fail on nested classes. + """ + if not dotted_path: + raise ImportError("Empty path") + + parts = dotted_path.split(".") + + # Try progressively shorter module paths + # e.g., for 'a.b.C.D', try 'a.b.C', then 'a.b', then 'a' + for i in range(len(parts), 0, -1): + module_path = ".".join(parts[:i]) + try: + obj = importlib.import_module(module_path) + # Successfully imported module, now getattr for remaining parts + for attr_name in parts[i:]: + obj = getattr(obj, attr_name) + return obj + except ImportError: + continue + except AttributeError: + # Module imported but attribute not found - keep trying shorter paths + continue + + raise ImportError(f"No module named '{dotted_path}'") + + +def _build_standard_import_path(obj: Any) -> str: + """Build 'module.qualname' path from an object with __module__ and __qualname__.""" + # Handle Python 2 -> 3 module name change for builtins + if obj.__module__ == "__builtin__": + module = "builtins" + else: + module = obj.__module__ + + qualname = obj.__qualname__ + # Strip generic type parameters (e.g., "MyClass[int]" -> "MyClass") + # This happens with Generic types in pydantic. Type info is lost but + # at least the base class remains importable. + # TODO: Find a way of capturing the underlying type info + if "[" in qualname: + qualname = qualname.split("[", 1)[0] + return f"{module}.{qualname}" if module else qualname class PyObjectPath(str): - """Similar to pydantic's ImportString (formerly PyObject in v1), this class represents the path to the object as a string. + """A string representing an importable Python object path (e.g., "module.ClassName"). - In pydantic v1, PyObject could not be serialized to json, whereas in v2, ImportString can. - However, the round trip is not always consistent, i.e. + Similar to pydantic's ImportString, but with consistent serialization behavior: + - ImportString deserializes to the actual object + - PyObjectPath deserializes back to the string path + + Example: >>> ta = TypeAdapter(ImportString) >>> ta.validate_json(ta.dump_json("math.pi")) 3.141592653589793 @@ -30,7 +78,7 @@ class PyObjectPath(str): >>> ta.validate_json(ta.dump_json("math.pi")) 'math.pi' - Other differences are that ImportString can contain other arbitrary python values, whereas PyObjectPath is always a string + PyObjectPath also only accepts importable objects, not arbitrary values: >>> TypeAdapter(ImportString).validate_python(0) 0 >>> TypeAdapter(PyObjectPath).validate_python(0) @@ -38,7 +86,7 @@ class PyObjectPath(str): """ # TODO: It would be nice to make this also derive from Generic[T], - # where T could then by used for type checking in validate. + # where T could then be used for type checking in validate. # However, this doesn't work: https://github.com/python/typing/issues/629 @cached_property @@ -52,41 +100,41 @@ def __get_pydantic_core_schema__(cls, source_type, handler): @classmethod def _validate(cls, value: Any): + """Convert value (string path or object) to PyObjectPath, verifying it's importable.""" if isinstance(value, str): - value = cls(value) - else: # Try to construct a string from the object that can then be used to import the object + path = cls(value) + else: + # Unwrap generic types (e.g., List[int] -> list) origin = get_origin(value) if origin: value = origin + path = cls._path_from_object(value) - # Check for ccflow's import path override first (used for local-scope classes) - # This allows classes with '' in __qualname__ to remain importable - # while preserving cloudpickle's ability to serialize the class definition - if hasattr(value, "__ccflow_import_path__"): - _ensure_registered_at_import_path(value) - value = cls(value.__ccflow_import_path__) - elif hasattr(value, "__module__") and hasattr(value, "__qualname__"): - if value.__module__ == "__builtin__": - module = "builtins" - else: - module = value.__module__ - qualname = value.__qualname__ - if "[" in qualname: - # This happens with Generic types in pydantic. We strip out the info for now. - # TODO: Find a way of capturing the underlying type info - qualname = qualname.split("[", 1)[0] - if not module: - value = cls(qualname) - else: - value = cls(module + "." + qualname) - else: - raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}") + # Verify the path is actually importable try: - value.object + path.object except ImportError as e: raise ValueError(f"ensure this value contains valid import path or importable object: {str(e)}") - return value + return path + + @classmethod + def _path_from_object(cls, value: Any) -> "PyObjectPath": + """Build import path from an object. Triggers ccflow registration for local classes.""" + if isinstance(value, type): + # For ccflow BaseModel subclasses that aren't normally importable (defined in + # functions or via create_model), this registers them on ccflow.base + _register_local_subclass_if_needed(value) + + # Use __ccflow_import_path__ if set (check __dict__ to avoid inheriting from parents) + if "__ccflow_import_path__" in value.__dict__: + return cls(value.__ccflow_import_path__) + return cls(_build_standard_import_path(value)) + + if hasattr(value, "__module__") and hasattr(value, "__qualname__"): + return cls(_build_standard_import_path(value)) + + raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}") @classmethod @lru_cache(maxsize=None) @@ -95,10 +143,12 @@ def _validate_cached(cls, value: str) -> Self: @classmethod def validate(cls, value) -> Self: - """Try to convert/validate an arbitrary value to a PyObjectPath.""" - if isinstance( - value, (str, type, FunctionType, ModuleType, MethodType) - ): # If the value is trivial, we cache it here to avoid the overhead of validation + """Try to convert/validate an arbitrary value to a PyObjectPath. + + Uses caching for common value types to improve performance. + """ + # Cache validation for common immutable types to avoid repeated work + if isinstance(value, (str, type, FunctionType, ModuleType, MethodType)): return cls._validate_cached(value) return _TYPE_ADAPTER.validate_python(value) diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index 5fd0efa..db1e748 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -1,196 +1,75 @@ -"""Helpers for persisting BaseModel-derived classes defined inside local scopes. - -This module enables PyObjectPath validation for classes defined inside functions (which have -'' in their __qualname__ and aren't normally importable). - -Key design decision: We DON'T modify __module__ or __qualname__. This preserves cloudpickle's -ability to serialize the class definition for cross-process transfer. Instead, we set a -separate __ccflow_import_path__ attribute that PyObjectPath uses. - -Cross-process cloudpickle flow: -1. Process A creates a local class -> we set __ccflow_import_path__ on it -2. cloudpickle.dumps() serializes the class definition (because '' in __qualname__) - INCLUDING the __ccflow_import_path__ attribute we set -3. Process B: cloudpickle.loads() reconstructs the class with __ccflow_import_path__ already set -4. Process B: __pydantic_init_subclass__ runs, sees __ccflow_import_path__ exists, - re-registers the class in this process's _local_artifacts module -""" +"""Register local-scope classes on ccflow.base so PyObjectPath can import them. -from __future__ import annotations +Classes defined in functions or via create_model aren't normally importable. +We give them a unique name and put them on ccflow.base. We keep __module__ and +__qualname__ unchanged so cloudpickle can still serialize the class definition. +""" -import importlib.abc -import importlib.util import re import sys -from collections import defaultdict -from itertools import count -from typing import TYPE_CHECKING, Any, DefaultDict, Type - -if TYPE_CHECKING: - from importlib.machinery import ModuleSpec - from types import ModuleType - -__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME",) - -LOCAL_ARTIFACTS_MODULE_NAME = "ccflow._local_artifacts" -_LOCAL_MODULE_DOC = "Auto-generated BaseModel subclasses created outside importable modules." - -_SANITIZE_PATTERN = re.compile(r"[^0-9A-Za-z_]") -_LOCAL_KIND_COUNTERS: DefaultDict[str, count] = defaultdict(lambda: count()) -_LOCAL_ARTIFACTS_MODULE: "ModuleType | None" = None - - -class _LocalArtifactsLoader(importlib.abc.Loader): - """Minimal loader so importlib can reload our synthetic module if needed.""" - - def __init__(self, *, doc: str) -> None: - self._doc = doc - - def create_module(self, spec: "ModuleSpec") -> "ModuleType | None": - """Defer to default module creation (keeping importlib from recursing).""" - return None - - def exec_module(self, module: "ModuleType") -> None: - module.__doc__ = module.__doc__ or self._doc - - -class _LocalArtifactsFinder(importlib.abc.MetaPathFinder): - """Ensures importlib can rediscover the synthetic module when reloading.""" - - def find_spec( - self, - fullname: str, - path: Any, - target: "ModuleType | None" = None, - ) -> "ModuleSpec | None": - if fullname != LOCAL_ARTIFACTS_MODULE_NAME: - return None - return _build_module_spec(fullname, _LOCAL_MODULE_DOC) - - -def _build_module_spec(name: str, doc: str) -> "ModuleSpec": - loader = _LocalArtifactsLoader(doc=doc) - spec = importlib.util.spec_from_loader( - name, - loader=loader, - origin="ccflow.local_persistence:_ensure_module", - ) - if spec is None: - raise ImportError(f"Unable to create spec for dynamic module {name!r}.") - spec.has_location = False - return spec - - -def _ensure_finder_installed() -> None: - for finder in sys.meta_path: - if isinstance(finder, _LocalArtifactsFinder): - return - sys.meta_path.insert(0, _LocalArtifactsFinder()) - - -def _ensure_module(name: str, doc: str) -> "ModuleType": - """Materialize the synthetic module with a real spec/loader so importlib treats it like disk-backed code. - - We only do this on demand, but once built the finder/spec/loader plumbing - keeps reload, pickling, and other importlib consumers happy. The Python docs recommend this approach instead of creating modules directly via the constructor.""" - _ensure_finder_installed() - module = sys.modules.get(name) - if module is None: - # Create a proper ModuleSpec + loader so importlib reloads and introspection behave - # the same way they would for filesystem-backed modules - spec = _build_module_spec(name, doc) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) # type: ignore[union-attr] - sys.modules[name] = module - parent_name, _, attr = name.rpartition(".") - if parent_name: - parent_module = sys.modules.get(parent_name) - if parent_module and not hasattr(parent_module, attr): - setattr(parent_module, attr, module) - return module - - -def _get_local_artifacts_module() -> "ModuleType": - """Lazily materialize the synthetic module to avoid errors during creation until needed.""" - global _LOCAL_ARTIFACTS_MODULE - if _LOCAL_ARTIFACTS_MODULE is None: - _LOCAL_ARTIFACTS_MODULE = _ensure_module(LOCAL_ARTIFACTS_MODULE_NAME, _LOCAL_MODULE_DOC) - return _LOCAL_ARTIFACTS_MODULE - - -def _needs_registration(cls: Type[Any]) -> bool: - qualname = getattr(cls, "__qualname__", "") - return "" in qualname - +import uuid +from typing import Any, Type -def _sanitize_identifier(value: str, fallback: str) -> str: - sanitized = _SANITIZE_PATTERN.sub("_", value or "") - sanitized = sanitized.strip("_") or fallback - if sanitized[0].isdigit(): - sanitized = f"_{sanitized}" - return sanitized +__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "_register_local_subclass_if_needed", "_register", "_sync_to_module") +LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.base" -def _build_unique_name(*, kind_slug: str, name_hint: str) -> str: - sanitized_hint = _sanitize_identifier(name_hint, "BaseModel") - counter = _LOCAL_KIND_COUNTERS[kind_slug] - return f"{kind_slug}__{sanitized_hint}__{next(counter)}" +def _is_importable(cls: Type[Any]) -> bool: + """Can cls be imported via its __module__.__qualname__ path?""" + qualname = getattr(cls, "__qualname__", "") + module_name = getattr(cls, "__module__", "") -def _ensure_registered_at_import_path(cls: Type[Any]) -> None: - """Ensure a class with __ccflow_import_path__ is actually registered in _local_artifacts. + if "" in qualname or module_name == "__main__": + return False - This handles the cross-process cloudpickle case: when cloudpickle reconstructs a class, - it has __ccflow_import_path__ set (serialized with the class definition), but the class - isn't registered in _local_artifacts in the new process yet. + module = sys.modules.get(module_name) + if not module: + return False - Called from both _register_local_subclass (during class creation/unpickling) and - PyObjectPath validation (when accessing type_). - """ - import_path = getattr(cls, "__ccflow_import_path__", None) - if import_path is None or not import_path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."): - return + obj = module + for part in qualname.split("."): + obj = getattr(obj, part, None) + if obj is None: + return False + return obj is cls - registered_name = import_path.rsplit(".", 1)[-1] - artifacts_module = _get_local_artifacts_module() - # Re-register if not present or points to different class - if getattr(artifacts_module, registered_name, None) is not cls: - setattr(artifacts_module, registered_name, cls) +def _register(cls: Type[Any]) -> None: + """Give cls a unique name and put it on ccflow.base.""" + name = re.sub(r"[^0-9A-Za-z_]", "_", cls.__name__ or "Model").strip("_") or "Model" + if name[0].isdigit(): + name = f"_{name}" + unique = f"_Local_{name}_{uuid.uuid4().hex[:12]}" + setattr(sys.modules["ccflow.base"], unique, cls) + cls.__ccflow_import_path__ = f"{LOCAL_ARTIFACTS_MODULE_NAME}.{unique}" -def _register_local_subclass(cls: Type[Any], *, kind: str = "model") -> None: - """Register BaseModel subclasses created in local scopes. - This enables PyObjectPath validation for classes that aren't normally importable - (e.g., classes defined inside functions). The class is registered in a synthetic - module (`ccflow._local_artifacts`) so it can be imported via the stored path. +def _sync_to_module(cls: Type[Any]) -> None: + """Ensure cls is on ccflow.base in this process (for cross-process unpickle).""" + path = getattr(cls, "__ccflow_import_path__", "") + if path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."): + name = path.rsplit(".", 1)[-1] + base = sys.modules["ccflow.base"] + if getattr(base, name, None) is not cls: + setattr(base, name, cls) - IMPORTANT: This function does NOT change __module__ or __qualname__. This is - intentional - it preserves cloudpickle's ability to serialize the class definition - for cross-process transfer. If __qualname__ contains '', cloudpickle - recognizes the class isn't normally importable and serializes its full definition. - Args: - cls: The class to register. - kind: A slug identifying the type of class (e.g., "model", "context", "callable_model"). - """ - # If already has import path, just ensure it's registered (handles cross-process unpickling) - if hasattr(cls, "__ccflow_import_path__"): - _ensure_registered_at_import_path(cls) +def _register_local_subclass_if_needed(cls: Type[Any]) -> None: + """Register cls if not importable. Called from PyObjectPath and __reduce_ex__.""" + if "__ccflow_import_path__" in cls.__dict__: + _sync_to_module(cls) return + if "__ccflow_importable__" in cls.__dict__: + return + + from ccflow.base import BaseModel - if not _needs_registration(cls): + if not (isinstance(cls, type) and issubclass(cls, BaseModel)): return - name_hint = f"{getattr(cls, '__module__', '')}.{getattr(cls, '__qualname__', '')}" - kind_slug = _sanitize_identifier(kind, "model").lower() - unique_name = _build_unique_name(kind_slug=kind_slug, name_hint=name_hint) - artifacts_module = _get_local_artifacts_module() - setattr(artifacts_module, unique_name, cls) - - # Store the import path as a separate attribute - DON'T change __module__ or __qualname__ - # This preserves cloudpickle's ability to serialize the class definition. - # The original module/qualname can still be retrieved via cls.__module__ and cls.__qualname__. - import_path = f"{artifacts_module.__name__}.{unique_name}" - setattr(cls, "__ccflow_import_path__", import_path) + if _is_importable(cls): + cls.__ccflow_importable__ = True + else: + _register(cls) diff --git a/ccflow/tests/exttypes/test_pyobjectpath.py b/ccflow/tests/exttypes/test_pyobjectpath.py index d25fabb..7b3dd93 100644 --- a/ccflow/tests/exttypes/test_pyobjectpath.py +++ b/ccflow/tests/exttypes/test_pyobjectpath.py @@ -62,3 +62,20 @@ def test_pickle(self): self.assertIsNotNone(p.object) self.assertEqual(p, pickle.loads(pickle.dumps(p))) self.assertEqual(p.object, pickle.loads(pickle.dumps(p.object))) + + def test_builtin_module_alias(self): + """Test that objects with __module__ == '__builtin__' are handled correctly. + + In Python 2, built-in types had __module__ == '__builtin__', but in Python 3 + it's 'builtins'. Some C extensions or pickled objects may still report the + old module name. + """ + + # Create a mock object that reports __builtin__ as its module + class MockBuiltinObject: + __module__ = "__builtin__" + __qualname__ = "int" + + p = PyObjectPath.validate(MockBuiltinObject) + self.assertEqual(p, "builtins.int") + self.assertEqual(p.object, int) diff --git a/ccflow/tests/test_base.py b/ccflow/tests/test_base.py index bd24cc1..84535f7 100644 --- a/ccflow/tests/test_base.py +++ b/ccflow/tests/test_base.py @@ -174,53 +174,33 @@ def test_widget(self): ) -class TestLocalRegistrationKind(TestCase): - def test_base_model_defaults_to_model_kind(self): - with mock.patch("ccflow.base._register_local_subclass") as register: +class TestLocalRegistration(TestCase): + def test_local_class_registered_for_base_model(self): + with mock.patch("ccflow.base._register") as register: class LocalModel(BaseModel): value: int - register.assert_called_once() - args, kwargs = register.call_args - self.assertIs(args[0], LocalModel) - self.assertEqual(kwargs["kind"], "model") + # Local classes (defined in functions) should be registered + calls = [args[0] for args, _ in register.call_args_list if args] + self.assertIn(LocalModel, calls) - def test_context_defaults_to_context_kind(self): - with mock.patch("ccflow.base._register_local_subclass") as register: + def test_local_class_registered_for_context(self): + with mock.patch("ccflow.base._register") as register: class LocalContext(ContextBase): value: int - register.assert_called_once() - args, kwargs = register.call_args - self.assertIs(args[0], LocalContext) - self.assertEqual(kwargs["kind"], "context") + calls = [args[0] for args, _ in register.call_args_list if args] + self.assertIn(LocalContext, calls) - def test_callable_defaults_to_callable_kind(self): - with mock.patch("ccflow.base._register_local_subclass") as register: + def test_local_class_registered_for_callable(self): + with mock.patch("ccflow.base._register") as register: class LocalCallable(CallableModel): @Flow.call def __call__(self, context: NullContext) -> GenericResult: return GenericResult(value="ok") - result = LocalCallable()(NullContext()) - - calls_for_local = [(args, kwargs) for args, kwargs in register.call_args_list if args and args[0] is LocalCallable] - self.assertEqual(len(calls_for_local), 1) - _, kwargs = calls_for_local[0] - self.assertEqual(kwargs["kind"], "callable_model") - self.assertEqual(result.value, "ok") - - def test_explicit_override_respected(self): - with mock.patch("ccflow.base._register_local_subclass") as register: - - class CustomKind(BaseModel): - __ccflow_local_registration_kind__ = "custom" - value: int - - register.assert_called_once() - args, kwargs = register.call_args - self.assertIs(args[0], CustomKind) - self.assertEqual(kwargs["kind"], "custom") + calls = [args[0] for args, _ in register.call_args_list if args] + self.assertIn(LocalCallable, calls) diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 56c5737..36c2133 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -590,14 +590,13 @@ def __call__(self, context: context_cls) -> GenericResult: locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] ctx_attr = _find_registered_name(locals_module, LocalContext) model_attr = _find_registered_name(locals_module, LocalCallable) - self.assertTrue(ctx_attr.startswith("context__")) - self.assertTrue(model_attr.startswith("callable_model__")) - ctx_hint = ctx_attr.partition("__")[2].rsplit("__", 1)[0] - model_hint = model_attr.partition("__")[2].rsplit("__", 1)[0] - self.assertEqual(ctx_hint, model_hint) + # Both should use _Local__ format + self.assertTrue(ctx_attr.startswith("_Local_LocalThing_")) + self.assertTrue(model_attr.startswith("_Local_LocalThing_")) + # UUIDs make them unique even with same class name self.assertEqual(getattr(locals_module, ctx_attr), LocalContext) self.assertEqual(getattr(locals_module, model_attr), LocalCallable) - self.assertNotEqual(ctx_attr, model_attr, "Kind-prefixed names keep contexts and callables distinct.") + self.assertNotEqual(ctx_attr, model_attr, "UUIDs keep same-named classes distinct.") def test_local_callable_type_path_roundtrip(self): LocalCallable = build_local_callable() diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 085aec0..657e180 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -1,11 +1,11 @@ +import re import subprocess import sys import textwrap -from collections import defaultdict -from itertools import count -from unittest import TestCase, mock +from unittest import TestCase import ray +from pydantic import create_model as pydantic_create_model import ccflow.local_persistence as local_persistence from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext @@ -25,13 +25,15 @@ def __call__(self, context: NullContext) -> GenericResult: return GenericResult(value="ok") -class TestNeedsRegistration(TestCase): - def test_module_level_ccflow_classes_do_not_need_registration(self): +class TestIsImportable(TestCase): + """Tests for _is_importable - the core check for whether a class needs registration.""" + + def test_module_level_ccflow_classes_are_importable(self): for cls in (ModuleLevelModel, ModuleLevelContext, ModuleLevelCallable): with self.subTest(cls=cls.__name__): - self.assertFalse(local_persistence._needs_registration(cls)) + self.assertTrue(local_persistence._is_importable(cls)) - def test_local_scope_class_needs_registration(self): + def test_local_scope_class_not_importable(self): def build_class(): class LocalClass: pass @@ -39,51 +41,27 @@ class LocalClass: return LocalClass LocalClass = build_class() - self.assertTrue(local_persistence._needs_registration(LocalClass)) + self.assertFalse(local_persistence._is_importable(LocalClass)) - def test_main_module_class_does_not_need_registration(self): + def test_main_module_class_not_importable(self): + # __main__ classes are not importable from other processes cls = type("MainModuleClass", (), {}) cls.__module__ = "__main__" cls.__qualname__ = "MainModuleClass" - self.assertFalse(local_persistence._needs_registration(cls)) + self.assertFalse(local_persistence._is_importable(cls)) - def test_module_level_non_ccflow_class_does_not_need_registration(self): - cls = type("ExternalClass", (), {}) - cls.__module__ = "ccflow.tests.test_local_persistence" - cls.__qualname__ = "ExternalClass" - self.assertFalse(local_persistence._needs_registration(cls)) - - -class TestBuildUniqueName(TestCase): - def test_build_unique_name_sanitizes_hint_and_increments_counter(self): - with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): - name = local_persistence._build_unique_name( - kind_slug="callable_model", - name_hint="module.path:MyCallable", - ) - self.assertTrue(name.startswith("callable_model__module_path_MyCallable_One_")) - self.assertTrue(name.endswith("__0")) - - second = local_persistence._build_unique_name( - kind_slug="callable_model", - name_hint="module.path:MyCallable", - ) - self.assertTrue(second.endswith("__1")) - - def test_counters_are_namespaced_by_kind(self): - with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): - first_context = local_persistence._build_unique_name(kind_slug="context", name_hint="Context") - first_callable = local_persistence._build_unique_name(kind_slug="callable_model", name_hint="Callable") - second_context = local_persistence._build_unique_name(kind_slug="context", name_hint="Other") - - self.assertTrue(first_context.endswith("__0")) - self.assertTrue(first_callable.endswith("__0")) - self.assertTrue(second_context.endswith("__1")) - - def test_empty_hint_uses_fallback(self): - with mock.patch.object(local_persistence, "_LOCAL_KIND_COUNTERS", defaultdict(lambda: count())): - name = local_persistence._build_unique_name(kind_slug="model", name_hint="") - self.assertEqual(name, "model__BaseModel__0") + def test_module_level_class_is_importable(self): + # Module-level classes are importable + self.assertTrue(local_persistence._is_importable(ModuleLevelModel)) + + def test_dynamically_created_class_not_importable(self): + """Test that dynamically created classes (like from pydantic's create_model) are not importable.""" + # This simulates what happens with pydantic's create_model + DynamicClass = type("DynamicClass", (), {}) + DynamicClass.__module__ = "ccflow.tests.test_local_persistence" + DynamicClass.__qualname__ = "DynamicClass" + # This class has a valid-looking module/qualname but isn't actually in the module + self.assertFalse(local_persistence._is_importable(DynamicClass)) def _run_subprocess(code: str) -> str: @@ -97,49 +75,19 @@ def _run_subprocess(code: str) -> str: return result.stdout.strip() -def test_local_artifacts_module_is_lazy(): +def test_base_module_available_after_import(): + """Test that ccflow.base module is available after importing ccflow.""" output = _run_subprocess( """ import sys - import ccflow.local_persistence as lp - - print(lp.LOCAL_ARTIFACTS_MODULE_NAME in sys.modules) - """ - ) - assert output == "False" - - -def test_local_artifacts_module_reload_preserves_dynamic_attrs(): - output = _run_subprocess( - """ - import importlib - import ccflow.local_persistence as lp - - def build_cls(): - class _Temp: - pass - return _Temp - - Temp = build_cls() - lp._register_local_subclass(Temp, kind="demo") - module = importlib.import_module(lp.LOCAL_ARTIFACTS_MODULE_NAME) - - # Extract the registered name from __ccflow_import_path__ - import_path = Temp.__ccflow_import_path__ - registered_name = import_path.split(".")[-1] - - before = getattr(module, registered_name) is Temp - module = importlib.reload(module) - after = getattr(module, registered_name) is Temp - - # __qualname__ should NOT have changed (preserves cloudpickle behavior) - print(before, after, "" in Temp.__qualname__) - """ + import ccflow + print(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME in sys.modules) + """.replace("local_persistence.LOCAL_ARTIFACTS_MODULE_NAME", f'"{local_persistence.LOCAL_ARTIFACTS_MODULE_NAME}"') ) - assert output.split() == ["True", "True", "True"] + assert output == "True" -def test_register_local_subclass_preserves_module_qualname_and_sets_import_path(): +def test_register_preserves_module_qualname_and_sets_import_path(): output = _run_subprocess( """ import sys @@ -153,7 +101,7 @@ class Foo: Foo = build() original_module = Foo.__module__ original_qualname = Foo.__qualname__ - lp._register_local_subclass(Foo, kind="ModelThing") + lp._register(Foo) module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] # __module__ and __qualname__ should NOT change (preserves cloudpickle) @@ -166,7 +114,7 @@ class Foo: registered_name = import_path.split(".")[-1] print(hasattr(module, registered_name)) print(getattr(module, registered_name) is Foo) - print(import_path.startswith("ccflow._local_artifacts.modelthing__")) + print(import_path.startswith("ccflow.base._Local_")) """ ) lines = output.splitlines() @@ -178,6 +126,68 @@ class Foo: assert lines[5] == "True", f"Import path should start with expected prefix: {lines}" +def test_register_handles_class_name_starting_with_digit(): + """Test that _register handles class names starting with a digit by prefixing with underscore.""" + output = _run_subprocess( + """ + import sys + import ccflow.local_persistence as lp + + # Create a class with a name starting with a digit + cls = type("3DModel", (), {}) + lp._register(cls) + + import_path = cls.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + + # The registered name should start with _Local__ (underscore added for digit) + print(registered_name.startswith("_Local__")) + print("_3DModel_" in registered_name) + + # Should be registered on ccflow.base + module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] + print(getattr(module, registered_name) is cls) + """ + ) + lines = output.splitlines() + assert lines[0] == "True", f"Registered name should start with _Local__: {lines}" + assert lines[1] == "True", f"Registered name should contain _3DModel_: {lines}" + assert lines[2] == "True", f"Class should be registered on module: {lines}" + + +def test_sync_to_module_registers_class_not_yet_on_module(): + """Test that _sync_to_module registers a class that has __ccflow_import_path__ but isn't on the module yet. + + This happens in cross-process unpickle scenarios where cloudpickle recreates the class + with __ccflow_import_path__ set, but the class isn't yet on ccflow.base. + """ + output = _run_subprocess( + """ + import sys + import ccflow.local_persistence as lp + + # Simulate a class that has __ccflow_import_path__ but isn't registered on ccflow.base + # (like what happens after cross-process cloudpickle unpickle) + cls = type("SimulatedUnpickled", (), {}) + unique_name = "_Local_SimulatedUnpickled_test123abc" + cls.__ccflow_import_path__ = f"{lp.LOCAL_ARTIFACTS_MODULE_NAME}.{unique_name}" + + # Verify class is NOT on ccflow.base yet + module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] + print(getattr(module, unique_name, None) is None) + + # Call _sync_to_module + lp._sync_to_module(cls) + + # Verify class IS now on ccflow.base + print(getattr(module, unique_name, None) is cls) + """ + ) + lines = output.splitlines() + assert lines[0] == "True", f"Class should NOT be on module before sync: {lines}" + assert lines[1] == "True", f"Class should be on module after sync: {lines}" + + def test_local_basemodel_cloudpickle_cross_process(): """Test that local-scope BaseModel subclasses work with cloudpickle cross-process. @@ -189,7 +199,8 @@ def test_local_basemodel_cloudpickle_cross_process(): import os import tempfile - pkl_path = tempfile.mktemp(suffix=".pkl") + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) try: # Create a local-scope BaseModel in subprocess 1 and pickle it @@ -272,6 +283,173 @@ class LocalModel(BaseModel): os.unlink(pkl_path) +# ============================================================================= +# Tests for pydantic's create_model +# ============================================================================= + + +class TestPydanticCreateModel: + """Tests for dynamically created models using pydantic's create_model.""" + + def test_create_model_gets_registered(self): + """Test that models created with pydantic's create_model get registered.""" + + def make_model(): + return pydantic_create_model("DynamicModel", value=(int, ...), __base__=BaseModel) + + Model = make_model() + + # Registration happens lazily when type_ is accessed + # Create an instance and access type_ to trigger registration + instance = Model(value=42) + _ = instance.type_ + + # Should have __ccflow_import_path__ set after type_ access + assert hasattr(Model, "__ccflow_import_path__") + import_path = Model.__ccflow_import_path__ + assert import_path.startswith("ccflow.base._Local_") + + # Should be registered in ccflow.base + registered_name = import_path.split(".")[-1] + import ccflow.base + + assert hasattr(ccflow.base, registered_name) + assert getattr(ccflow.base, registered_name) is Model + + def test_create_model_type_works(self): + """Test that type_ property works for create_model-created models.""" + + def make_model(): + return pydantic_create_model("DynamicModel", value=(int, ...), __base__=BaseModel) + + Model = make_model() + instance = Model(value=42) + + # type_ should work and return the import path + type_path = instance.type_ + assert str(type_path) == Model.__ccflow_import_path__ + assert type_path.object is Model + + def test_create_model_importable(self): + """Test that create_model models can be imported via their type_ path.""" + import importlib + + def make_model(): + return pydantic_create_model("ImportableModel", data=(str, ...), __base__=BaseModel) + + Model = make_model() + instance = Model(data="test") + + type_path = str(instance.type_) + parts = type_path.rsplit(".", 1) + module = importlib.import_module(parts[0]) + imported_cls = getattr(module, parts[1]) + + assert imported_cls is Model + + def test_create_model_without_locals_still_gets_uuid(self): + """Test that create_model models (which don't have in qualname) get unique UUIDs.""" + + def make_models(): + Model1 = pydantic_create_model("SameName", x=(int, ...), __base__=BaseModel) + Model2 = pydantic_create_model("SameName", y=(str, ...), __base__=BaseModel) + return Model1, Model2 + + Model1, Model2 = make_models() + + # Both should be properly registered (trigger registration via type_) + instance1 = Model1.model_validate({"x": 1}) + instance2 = Model2.model_validate({"y": "test"}) + _ = instance1.type_ + _ = instance2.type_ + + # Both should have unique import paths (after type_ access triggers registration) + assert Model1.__ccflow_import_path__ != Model2.__ccflow_import_path__ + + assert instance1.x == 1 + assert instance2.y == "test" + + def test_create_model_context_base(self): + """Test create_model with ContextBase as base class.""" + + def make_context(): + return pydantic_create_model("DynamicContext", name=(str, ...), value=(int, ...), __base__=ContextBase) + + Context = make_context() + instance = Context(name="test", value=42) + + # Access type_ to trigger registration + type_path = instance.type_ + assert type_path.object is Context + + # After type_ access, __ccflow_import_path__ should be set + assert hasattr(Context, "__ccflow_import_path__") + + def test_create_model_cloudpickle_same_process(self): + """Test that create_model models work with cloudpickle in the same process.""" + from ray.cloudpickle import dumps, loads + + def make_model(): + return pydantic_create_model("PickleModel", value=(int, ...), __base__=BaseModel) + + Model = make_model() + instance = Model(value=123) + + restored = loads(dumps(instance)) + + assert restored.value == 123 + assert type(restored) is Model + + def test_create_model_cloudpickle_cross_process(self): + """Test that create_model models work with cloudpickle across processes.""" + import os + import tempfile + + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) + + try: + create_code = f''' +from ray.cloudpickle import dump +from pydantic import create_model +from ccflow import BaseModel + +Model = create_model("CrossProcessModel", value=(int, ...), __base__=BaseModel) + +instance = Model(value=42) +# Access type_ to trigger registration (lazy for create_model) +type_path = instance.type_ +print(f"type_: {{type_path}}") + +# Now __ccflow_import_path__ should be set +assert hasattr(Model, "__ccflow_import_path__"), "Expected __ccflow_import_path__ after type_ access" + +with open("{pkl_path}", "wb") as f: + dump(instance, f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + obj = load(f) + +assert obj.value == 42 +type_path = obj.type_ +assert type_path.object is type(obj) +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + # ============================================================================= # Comprehensive tests for local persistence and PyObjectPath integration # ============================================================================= @@ -294,7 +472,7 @@ class Inner(BaseModel): assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME def test_module_not_changed_to_local_artifacts(self): - """Verify that __module__ is NOT changed to _local_artifacts.""" + """Verify that __module__ is NOT changed to ccflow.base.""" def create_class(): class Inner(ContextBase): @@ -303,7 +481,7 @@ class Inner(ContextBase): return Inner cls = create_class() - # __module__ should be this test module, not _local_artifacts + # __module__ should be this test module, not ccflow.base assert cls.__module__ == "ccflow.tests.test_local_persistence" assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME @@ -320,9 +498,8 @@ class Inner(BaseModel): assert hasattr(cls, "__ccflow_import_path__") assert cls.__ccflow_import_path__.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME + ".") - def test_class_registered_in_local_artifacts(self): - """Verify that the class is registered in _local_artifacts under import path.""" - import sys + def test_class_registered_in_base_module(self): + """Verify that the class is registered in ccflow.base under import path.""" def create_class(): class Inner(BaseModel): @@ -478,7 +655,8 @@ def test_context_base_cross_process(self): import os import tempfile - pkl_path = tempfile.mktemp(suffix=".pkl") + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) try: create_code = f''' @@ -522,12 +700,112 @@ class LocalContext(ContextBase): if os.path.exists(pkl_path): os.unlink(pkl_path) + def test_sync_to_module_lazy_on_cross_process_unpickle(self): + """Test that _sync_to_module happens lazily when accessing type_ after cross-process unpickle. + + When cloudpickle recreates a class in a new process, __ccflow_import_path__ is preserved + but the class is NOT immediately registered on ccflow.base (cloudpickle sets attributes + AFTER __pydantic_init_subclass__ runs). Registration happens lazily when: + - type_ is accessed (calls PyObjectPath.validate) + - PyObjectPath.validate(cls) is called directly + - _register_local_subclass_if_needed(cls) is called + """ + import os + import tempfile + + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) + + try: + # Process A: Create a local class and pickle the CLASS itself (not just instance) + create_code = f''' +import sys +from ray.cloudpickle import dump +from ccflow import BaseModel +from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME + +def create_model(): + class LocalModel(BaseModel): + value: int + return LocalModel + +LocalModel = create_model() + +# Verify class is registered and has import path +assert hasattr(LocalModel, "__ccflow_import_path__"), "Should have import path" +import_path = LocalModel.__ccflow_import_path__ +assert import_path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."), f"Bad path: {{import_path}}" + +# Extract registered name and verify it's on ccflow.base +registered_name = import_path.split(".")[-1] +base_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] +assert getattr(base_module, registered_name, None) is LocalModel, "Should be on ccflow.base" + +# Pickle the CLASS itself (cloudpickle will serialize the class definition) +with open("{pkl_path}", "wb") as f: + dump(LocalModel, f) + +# Output the import path so we can verify it's the same in process B +print(import_path) +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + original_import_path = create_result.stdout.strip() + + # Process B: Unpickle the class and verify lazy sync behavior + load_code = f''' +import sys +from ray.cloudpickle import load +from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME +from ccflow import PyObjectPath + +with open("{pkl_path}", "rb") as f: + LocalModel = load(f) + +# Verify __ccflow_import_path__ is preserved from pickle +assert hasattr(LocalModel, "__ccflow_import_path__"), "Should have import path after unpickle" +import_path = LocalModel.__ccflow_import_path__ +assert import_path == "{original_import_path}", f"Path mismatch: {{import_path}} != {original_import_path}" + +# BEFORE accessing type_: class is NOT yet registered on ccflow.base +# (cloudpickle sets attributes AFTER class creation, so __pydantic_init_subclass__ doesn't sync) +registered_name = import_path.split(".")[-1] +base_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] +registered_cls_before = getattr(base_module, registered_name, None) +# Note: registered_cls_before may be None OR may be LocalModel (if already synced by another path) + +# Trigger lazy sync via PyObjectPath.validate +path = PyObjectPath.validate(LocalModel) +assert path == import_path, f"PyObjectPath should return the ccflow import path" + +# AFTER accessing PyObjectPath.validate: class IS registered on ccflow.base +registered_cls_after = getattr(base_module, registered_name, None) +assert registered_cls_after is LocalModel, "_sync_to_module should have registered class on ccflow.base" + +# Verify PyObjectPath.object resolves correctly +assert path.object is LocalModel, "PyObjectPath.object should resolve to the class" + +# Verify the class is functional +instance = LocalModel(value=123) +assert instance.value == 123 + +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + def test_callable_model_cross_process(self): """Test cross-process cloudpickle for CallableModel subclasses.""" import os import tempfile - pkl_path = tempfile.mktemp(suffix=".pkl") + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) try: create_code = f''' @@ -589,7 +867,8 @@ def test_nested_local_classes_cross_process(self): import os import tempfile - pkl_path = tempfile.mktemp(suffix=".pkl") + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) try: create_code = f''' @@ -637,7 +916,8 @@ def test_multiple_instances_same_local_class_cross_process(self): import os import tempfile - pkl_path = tempfile.mktemp(suffix=".pkl") + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) try: create_code = f''' @@ -699,9 +979,9 @@ def test_module_level_class_type_path_uses_qualname(self): instance = ModuleLevelModel(value=1) type_path = str(instance.type_) - # Should use standard path, not _local_artifacts + # Should use standard path, not ccflow.base._Local_... assert type_path == "ccflow.tests.test_local_persistence.ModuleLevelModel" - assert local_persistence.LOCAL_ARTIFACTS_MODULE_NAME not in type_path + assert "_Local_" not in type_path def test_module_level_standard_pickle_works(self): """Verify standard pickle works for module-level classes.""" @@ -818,4 +1098,577 @@ def process_model(m): with ray.init(num_cpus=1): result = ray.get(process_model.remote(model)) - assert result.startswith("hello|ccflow._local_artifacts.") + assert result.startswith("hello|ccflow.base._Local_") + + def test_create_model_ray_task(self): + """Test that pydantic create_model models can be sent to Ray tasks.""" + + def make_model(): + return pydantic_create_model("RayModel", value=(int, ...), __base__=BaseModel) + + Model = make_model() + + @ray.remote + def process_model(m): + type_path = str(m.type_) + return f"{m.value}|{type_path}" + + model = Model(value=99) + + with ray.init(num_cpus=1): + result = ray.get(process_model.remote(model)) + + assert result.startswith("99|ccflow.base._Local_") + + +class TestUUIDUniqueness: + """Tests verifying UUID-based naming provides cross-process uniqueness.""" + + def test_different_processes_get_different_uuids(self): + """Test that different processes generate different UUIDs for same class name.""" + import os + import tempfile + + fd1, pkl_path1 = tempfile.mkstemp(suffix="_1.pkl") + fd2, pkl_path2 = tempfile.mkstemp(suffix="_2.pkl") + os.close(fd1) + os.close(fd2) + + try: + # Create same-named class in two different processes + # Note: For module-level classes, registration only happens when needed + # Access type_ to trigger registration before checking __ccflow_import_path__ + code = """ +import sys +from ray.cloudpickle import dump +from ccflow import BaseModel + +class SameName(BaseModel): + value: int + +instance = SameName(value={value}) +# Access type_ to trigger registration if needed +_ = instance.type_ +print(SameName.__ccflow_import_path__) + +with open("{pkl_path}", "wb") as f: + dump(instance, f) +""" + result1 = subprocess.run([sys.executable, "-c", code.format(value=1, pkl_path=pkl_path1)], capture_output=True, text=True) + assert result1.returncode == 0, f"Process 1 failed: {result1.stderr}" + path1 = result1.stdout.strip() + + result2 = subprocess.run([sys.executable, "-c", code.format(value=2, pkl_path=pkl_path2)], capture_output=True, text=True) + assert result2.returncode == 0, f"Process 2 failed: {result2.stderr}" + path2 = result2.stdout.strip() + + # UUIDs should be different even though class names are the same + assert path1 != path2 + assert "_Local_SameName_" in path1 + assert "_Local_SameName_" in path2 + + # Both should be loadable in the same process + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path1}", "rb") as f: + obj1 = load(f) + +with open("{pkl_path2}", "rb") as f: + obj2 = load(f) + +assert obj1.value == 1 +assert obj2.value == 2 +# They should be different types +assert type(obj1) is not type(obj2) +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + finally: + for p in [pkl_path1, pkl_path2]: + if os.path.exists(p): + os.unlink(p) + + def test_uuid_format_is_valid(self): + """Test that the UUID portion of names is valid hex.""" + + def create_class(): + class TestModel(BaseModel): + x: int + + return TestModel + + Model = create_class() + import_path = Model.__ccflow_import_path__ + + # Extract UUID portion + match = re.search(r"_Local_TestModel_([a-f0-9]+)$", import_path) + assert match is not None, f"Import path doesn't match expected format: {import_path}" + + uuid_part = match.group(1) + assert len(uuid_part) == 12, f"UUID should be 12 hex chars, got {len(uuid_part)}" + assert all(c in "0123456789abcdef" for c in uuid_part) + + +# ============================================================================= +# Edge case tests +# ============================================================================= + + +class OuterClass: + """Module-level outer class for testing nested class importability.""" + + class NestedModel(BaseModel): + """A BaseModel nested inside a module-level class.""" + + value: int + + +class TestImportString: + """Tests for the import_string function.""" + + def test_import_string_handles_nested_class_path(self): + """Verify our import_string handles nested class paths that pydantic's ImportString cannot. + + Pydantic's ImportString fails on paths like 'module.OuterClass.InnerClass' because + it tries to import the entire path as a module. Our import_string progressively + tries shorter module paths and uses getattr for the rest. + """ + import pytest + from pydantic import ImportString, TypeAdapter + + from ccflow.exttypes.pyobjectpath import import_string + + nested_path = "ccflow.tests.test_local_persistence.OuterClass.NestedModel" + + # Pydantic's ImportString fails on nested class paths + pydantic_adapter = TypeAdapter(ImportString) + with pytest.raises(Exception, match="No module named"): + pydantic_adapter.validate_python(nested_path) + + # Our import_string handles it correctly + result = import_string(nested_path) + assert result is OuterClass.NestedModel + + def test_import_string_still_works_for_simple_paths(self): + """Verify import_string still works for simple module.ClassName paths.""" + from ccflow.exttypes.pyobjectpath import import_string + + # Simple class path + result = import_string("ccflow.tests.test_local_persistence.ModuleLevelModel") + assert result is ModuleLevelModel + + # Built-in module + result = import_string("os.path.join") + import os.path + + assert result is os.path.join + + +class TestNestedClasses: + """Tests for classes nested inside other classes.""" + + def test_nested_class_inside_module_level_class_is_importable(self): + """Verify that a nested class inside a module-level class IS importable. + + Classes nested inside module-level classes (like OuterClass.NestedModel) + have qualnames like 'OuterClass.NestedModel' and ARE importable via + the standard module.qualname path. + """ + # The qualname has a '.' indicating nested class + assert "." in OuterClass.NestedModel.__qualname__ + assert OuterClass.NestedModel.__qualname__ == "OuterClass.NestedModel" + + # Should be importable - it's in the module namespace + assert local_persistence._is_importable(OuterClass.NestedModel) + + # Should not have __ccflow_import_path__ + assert "__ccflow_import_path__" not in OuterClass.NestedModel.__dict__ + + # type_ should use standard path + instance = OuterClass.NestedModel(value=42) + type_path = str(instance.type_) + assert type_path == "ccflow.tests.test_local_persistence.OuterClass.NestedModel" + assert "_Local_" not in type_path + assert instance.type_.object is OuterClass.NestedModel + + def test_nested_class_inside_function_not_importable(self): + """Verify that a class nested inside a function-defined class is not importable.""" + + def create_outer(): + class Outer: + class Inner(BaseModel): + value: int + + return Outer + + Outer = create_outer() + # The inner class has in its qualname (from Outer) + assert "" in Outer.Inner.__qualname__ + assert not local_persistence._is_importable(Outer.Inner) + + # Should get registered and have __ccflow_import_path__ + assert hasattr(Outer.Inner, "__ccflow_import_path__") + + +class TestInheritanceDoesNotPropagateImportPath: + """Tests verifying that __ccflow_import_path__ is not inherited by subclasses.""" + + def test_subclass_of_local_class_gets_own_registration(self): + """Verify that subclassing a local class doesn't inherit __ccflow_import_path__.""" + + def create_base(): + class LocalBase(BaseModel): + value: int + + return LocalBase + + def create_derived(base_cls): + class LocalDerived(base_cls): + extra: str = "default" + + return LocalDerived + + Base = create_base() + Derived = create_derived(Base) + + # Both should have __ccflow_import_path__ in their own __dict__ + assert "__ccflow_import_path__" in Base.__dict__ + assert "__ccflow_import_path__" in Derived.__dict__ + + # They should have DIFFERENT import paths + assert Base.__ccflow_import_path__ != Derived.__ccflow_import_path__ + + # Both should be importable + base_instance = Base(value=1) + derived_instance = Derived(value=2, extra="test") + + assert base_instance.type_.object is Base + assert derived_instance.type_.object is Derived + + def test_subclass_of_module_level_class_not_registered(self): + """Verify that subclassing a module-level class inside a function creates a local class.""" + + def create_subclass(): + class LocalSubclass(ModuleLevelModel): + extra: str = "default" + + return LocalSubclass + + Subclass = create_subclass() + + # The subclass is local (defined in function), so needs registration + assert "" in Subclass.__qualname__ + assert "__ccflow_import_path__" in Subclass.__dict__ + + # But the parent should NOT have __ccflow_import_path__ + assert "__ccflow_import_path__" not in ModuleLevelModel.__dict__ + + +class TestCreateModelEdgeCases: + """Additional edge case tests for pydantic's create_model.""" + + def test_create_model_with_same_name_as_module_level_class(self): + """Test create_model with a name that matches an existing module-level class. + + The dynamically created class should get its own registration, not + conflict with the module-level class. + """ + + def make_model(): + # Create a model with the same name as ModuleLevelModel + return pydantic_create_model("ModuleLevelModel", x=(int, ...), __base__=BaseModel) + + DynamicModel = make_model() + + # The dynamic model should NOT be the same as the module-level one + assert DynamicModel is not ModuleLevelModel + + # Access type_ to trigger registration + instance = DynamicModel(x=123) + type_path = str(instance.type_) + + # Should get a _Local_ path since it's not actually importable + assert "_Local_" in type_path + assert "ModuleLevelModel" in type_path + + # The module-level class should still use its standard path + module_instance = ModuleLevelModel(value=456) + module_type_path = str(module_instance.type_) + assert module_type_path == "ccflow.tests.test_local_persistence.ModuleLevelModel" + + def test_create_model_result_assigned_to_different_name(self): + """Test that create_model models get registered even when assigned to a different name. + + create_model("Foo", ...) assigned to variable Bar should still work. + """ + DifferentName = pydantic_create_model("OriginalName", value=(int, ...), __base__=BaseModel) + + # The class __name__ is "OriginalName" but it's stored in variable DifferentName + assert DifferentName.__name__ == "OriginalName" + + # Should still work correctly + instance = DifferentName(value=42) + type_path = str(instance.type_) + + # The registration should use "OriginalName" (the __name__) + assert "OriginalName" in type_path + assert instance.type_.object is DifferentName + + +class TestGenericTypes: + """Tests for generic types and PyObjectPath.""" + + def test_generic_basemodel_type_path(self): + """Test that generic BaseModel subclasses work with type_. + + When you parameterize a generic type like GenericModel[int], + the resulting class has __name__='GenericModel[int]'. + The registration sanitizes this to 'GenericModel_int_'. + """ + from typing import Generic, TypeVar + + T = TypeVar("T") + + def create_generic(): + class GenericModel(BaseModel, Generic[T]): + data: T + + return GenericModel + + GenericModel = create_generic() + + # Create a concrete (parameterized) instance + instance = GenericModel[int](data=42) + + # type_ should work - the class gets registered with sanitized name + type_path = str(instance.type_) + # The parameterized type has __name__='GenericModel[int]' + # which gets sanitized to 'GenericModel_int_' in the registration + assert "_Local_" in type_path + assert "GenericModel" in type_path + # Verify the path actually resolves to the correct class + assert instance.type_.object is type(instance) + + def test_unparameterized_generic_type_path(self): + """Test that unparameterized generic types work with type_.""" + from typing import Generic, TypeVar + + T = TypeVar("T") + + def create_generic(): + class GenericModel(BaseModel, Generic[T]): + data: T # Will be Any when unparameterized + + return GenericModel + + GenericModel = create_generic() + + # Create an unparameterized instance + instance = GenericModel(data=42) + + # type_ should work + type_path = str(instance.type_) + assert "_Local_" in type_path + assert "GenericModel" in type_path + # No type parameter suffix since we used unparameterized version + assert instance.type_.object is GenericModel + + def test_build_standard_import_path_strips_brackets(self): + """Test that _build_standard_import_path strips generic type parameters from qualname.""" + from ccflow.exttypes.pyobjectpath import _build_standard_import_path + + class MockClass: + __module__ = "test.module" + __qualname__ = "MyClass[int, str]" + + path = _build_standard_import_path(MockClass) + assert "[" not in path + assert path == "test.module.MyClass" + + +class TestReduceTriggersRegistration: + """Tests verifying that __reduce__ triggers registration for consistent cross-process UUIDs.""" + + def test_pickle_triggers_registration_for_create_model(self): + """Verify that pickling a create_model instance triggers registration. + + Without __reduce__ triggering registration, create_model classes would get + different UUIDs in different processes if pickled before type_ is accessed. + """ + from ray.cloudpickle import dumps, loads + + def factory(): + return pydantic_create_model("ReduceTestModel", value=(int, ...), __base__=BaseModel) + + DynamicModel = factory() + instance = DynamicModel(value=42) + + # Before pickle, no __ccflow_import_path__ (deferred registration) + assert "__ccflow_import_path__" not in DynamicModel.__dict__ + + # Pickle triggers __reduce__ which triggers registration + pickled = dumps(instance) + + # After pickle, registration has happened + assert "__ccflow_import_path__" in DynamicModel.__dict__ + + # Unpickle and verify same UUID + restored = loads(pickled) + assert type(restored).__ccflow_import_path__ == DynamicModel.__ccflow_import_path__ + + def test_pickle_cross_process_consistent_uuid(self): + """Verify that pickling before type_ access gives consistent UUIDs across processes.""" + import os + import tempfile + + fd, pkl_path = tempfile.mkstemp(suffix=".pkl") + os.close(fd) + + try: + # Process 1: Create and pickle WITHOUT accessing type_ first + create_code = f''' +from ray.cloudpickle import dump +from pydantic import create_model +from ccflow import BaseModel + +def factory(): + return create_model("CrossProcessReduceModel", value=(int, ...), __base__=BaseModel) + +Model = factory() +instance = Model(value=42) + +# Pickle WITHOUT accessing type_ - __reduce__ should trigger registration +with open("{pkl_path}", "wb") as f: + dump(instance, f) + +print(Model.__ccflow_import_path__) +''' + result1 = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert result1.returncode == 0, f"Process 1 failed: {result1.stderr}" + path1 = result1.stdout.strip() + + # Process 2: Load and verify same UUID + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + restored = load(f) + +print(type(restored).__ccflow_import_path__) +''' + result2 = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert result2.returncode == 0, f"Process 2 failed: {result2.stderr}" + path2 = result2.stdout.strip() + + # UUIDs must match + assert path1 == path2, f"UUID mismatch: {path1} != {path2}" + + finally: + if os.path.exists(pkl_path): + os.unlink(pkl_path) + + +class TestRegistrationStrategy: + """Tests verifying the registration strategy for different class types. + + The strategy is: + 1. Module-level classes: No registration needed, they're importable via standard path + 2. Local classes ( in qualname): Register immediately during class definition + 3. Dynamic classes (create_model, no ): Registered during pickle via __reduce__ + + This keeps import-time overhead minimal while still handling all cases correctly. + """ + + def test_module_level_classes_not_registered(self): + """Module-level classes should NOT have __ccflow_import_path__ set.""" + # ModuleLevelModel is defined at module level in this file + assert "__ccflow_import_path__" not in ModuleLevelModel.__dict__ + assert "" not in ModuleLevelModel.__qualname__ + + # Nested classes at module level also shouldn't need registration + assert "__ccflow_import_path__" not in OuterClass.NestedModel.__dict__ + + def test_local_class_registered_immediately(self): + """Local classes (with in qualname) should be registered during definition.""" + from unittest import mock + + # Must patch where it's used (base.py), not where it's defined (local_persistence) + with mock.patch("ccflow.base._register") as mock_do_reg: + + def create_local(): + class LocalModel(BaseModel): + value: int + + return LocalModel + + LocalModel = create_local() + + # _register SHOULD be called immediately for local classes + mock_do_reg.assert_called_once() + # Verify it has in qualname + assert "" in LocalModel.__qualname__ + + def test_create_model_defers_registration_until_type_access(self): + """create_model classes should defer registration until type_ is accessed. + + This is the key optimization: create_model produces classes without + in their qualname, so we can't tell at definition time if they need registration. + We defer the _is_importable check until type_ is accessed. + """ + from unittest import mock + + with mock.patch.object(local_persistence, "_register") as mock_do_reg: + # Create a model via create_model + DynamicModel = pydantic_create_model("DeferredModel", x=(int, ...), __base__=BaseModel) + + # No in qualname + assert "" not in DynamicModel.__qualname__ + + # _register should NOT have been called yet + mock_do_reg.assert_not_called() + + # Now access type_ which triggers the deferred check + instance = DynamicModel(x=1) + _ = instance.type_ + + # NOW it should have __ccflow_import_path__ + assert "__ccflow_import_path__" in DynamicModel.__dict__ + + def test_is_importable_only_called_lazily(self): + """_is_importable should only be called when we need to check, not during import.""" + from unittest import mock + + # Create a model via create_model + DynamicModel = pydantic_create_model("LazyCheckModel", x=(int, ...), __base__=BaseModel) + + with mock.patch.object(local_persistence, "_is_importable", wraps=local_persistence._is_importable) as mock_is_imp: + # Access type_ which triggers lazy registration check + instance = DynamicModel(x=1) + _ = instance.type_ + + # _is_importable should have been called + assert mock_is_imp.call_count >= 1 + + +class TestIsImportableEdgeCases: + """Tests for edge cases in the _is_importable function.""" + + def test_class_with_module_not_in_sys_modules(self): + """Test that a class claiming to be from an unloaded module is not importable.""" + cls = type("NotImportable", (), {}) + cls.__module__ = "nonexistent.fake.module" + cls.__qualname__ = "NotImportable" + + assert not local_persistence._is_importable(cls) + + def test_class_in_wrong_module(self): + """Test that a class claiming to be from a module it's not in is not importable.""" + cls = type("FakeClass", (), {}) + # Claim to be from this module, but with a name that doesn't exist + cls.__module__ = "ccflow.tests.test_local_persistence" + cls.__qualname__ = "ThisClassDoesNotExist" + + assert not local_persistence._is_importable(cls) diff --git a/docs/wiki/Key-Features.md b/docs/wiki/Key-Features.md index ea78c63..616e3d8 100644 --- a/docs/wiki/Key-Features.md +++ b/docs/wiki/Key-Features.md @@ -22,9 +22,6 @@ The naming was inspired by the open source library [Pydantic](https://docs.pydan `CallableModel`'s are called with a context (something that derives from `ContextBase`) and returns a result (something that derives from `ResultBase`). As an example, you may have a `SQLReader` callable model that when called with a `DateRangeContext` returns a `ArrowResult` (wrapper around a Arrow table) with data in the date range defined by the context by querying some SQL database. -> [!NOTE] -> `CallableModel`, `ContextBase`, and other `ccflow.BaseModel` subclasses declared inside local scopes (functions, tests, etc.) are automatically persisted under `ccflow._local_artifacts`. Each stored class is prefixed with its kind (for example, `callable_model__...` versus `context__...`) so PyObjectPath-based evaluators can serialize/deserialize them without collisions. The backing module is append-only; long-lived processes should avoid generating unbounded unique classes if cleanup is required. - ## Model Registry A `ModelRegistry` is a named collection of models. From f0f32620732cbca5e1a72ab012548d1a06931bef Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 01:22:24 -0500 Subject: [PATCH 10/15] Simplify logic, restrict scope to not include create_model, which is not yet supported Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 30 +- ccflow/exttypes/pyobjectpath.py | 16 +- ccflow/local_persistence.py | 75 +- ccflow/tests/evaluators/test_common.py | 57 -- ccflow/tests/local_helpers.py | 90 -- ccflow/tests/test_callable.py | 285 +----- ccflow/tests/test_local_persistence.py | 1273 +++++------------------- 7 files changed, 335 insertions(+), 1491 deletions(-) delete mode 100644 ccflow/tests/local_helpers.py diff --git a/ccflow/base.py b/ccflow/base.py index 8630d23..eb7eef8 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -30,7 +30,7 @@ from typing_extensions import Self from .exttypes.pyobjectpath import PyObjectPath -from .local_persistence import _register, _register_local_subclass_if_needed +from .local_persistence import _register, _sync_to_module log = logging.getLogger(__name__) @@ -168,20 +168,6 @@ def __pydantic_init_subclass__(cls, **kwargs): if "" in cls.__qualname__: _register(cls) - def __reduce_ex__(self, protocol): - # Ensure the class is registered before pickling. This is necessary for classes - # created dynamically (e.g., via pydantic's create_model) that don't have '' - # in their __qualname__. Without this, such classes would get different UUIDs in - # different processes if pickled before type_ is accessed, breaking cross-process - # consistency. By registering here, we guarantee __ccflow_import_path__ is set - # before cloudpickle serializes the class definition. - # - # We use __reduce_ex__ instead of __reduce__ because it's called first, allowing us - # to perform the registration side effect. We then return NotImplemented to let - # pydantic's default pickling mechanism take over, preserving the pickle format. - _register_local_subclass_if_needed(type(self)) - return super().__reduce_ex__(protocol) - @computed_field( alias="_target_", repr=False, @@ -191,8 +177,18 @@ def __reduce_ex__(self, protocol): ) @property def type_(self) -> PyObjectPath: - """The path to the object type""" - return PyObjectPath.validate(type(self)) + """The path to the object type. + + For local classes (defined in functions), this returns the __ccflow_import_path__. + For cross-process unpickle scenarios, this also ensures the class is synced to + the ccflow.local_persistence module so the import path resolves correctly. + """ + cls = type(self) + # Handle cross-process unpickle: cloudpickle sets __ccflow_import_path__ but + # the class may not be on ccflow.local_persistence yet in this process + if "__ccflow_import_path__" in cls.__dict__: + _sync_to_module(cls) + return PyObjectPath.validate(cls) # We want to track under what names a model has been registered _registrations: List[Tuple["ModelRegistry", str]] = PrivateAttr(default_factory=list) diff --git a/ccflow/exttypes/pyobjectpath.py b/ccflow/exttypes/pyobjectpath.py index 10131c5..178c4cb 100644 --- a/ccflow/exttypes/pyobjectpath.py +++ b/ccflow/exttypes/pyobjectpath.py @@ -9,8 +9,6 @@ from pydantic_core import core_schema from typing_extensions import Self -from ccflow.local_persistence import _register_local_subclass_if_needed - @lru_cache(maxsize=None) def import_string(dotted_path: str) -> Any: @@ -120,13 +118,15 @@ def _validate(cls, value: Any): @classmethod def _path_from_object(cls, value: Any) -> "PyObjectPath": - """Build import path from an object. Triggers ccflow registration for local classes.""" - if isinstance(value, type): - # For ccflow BaseModel subclasses that aren't normally importable (defined in - # functions or via create_model), this registers them on ccflow.base - _register_local_subclass_if_needed(value) + """Build import path from an object. - # Use __ccflow_import_path__ if set (check __dict__ to avoid inheriting from parents) + For ccflow BaseModel subclasses with __ccflow_import_path__ set (local classes), + uses that path. Otherwise uses the standard module.qualname path. + """ + if isinstance(value, type): + # Use __ccflow_import_path__ if set (check __dict__ to avoid inheriting from parents). + # Note: accessing .__dict__ is safe here because value is a type (class object), + # and all class objects have __dict__. Only instances of __slots__ classes lack it. if "__ccflow_import_path__" in value.__dict__: return cls(value.__ccflow_import_path__) return cls(_build_standard_import_path(value)) diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index db1e748..bc598fa 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -1,8 +1,14 @@ -"""Register local-scope classes on ccflow.base so PyObjectPath can import them. +"""Register local-scope classes on a module so PyObjectPath can import them. -Classes defined in functions or via create_model aren't normally importable. -We give them a unique name and put them on ccflow.base. We keep __module__ and -__qualname__ unchanged so cloudpickle can still serialize the class definition. +Classes defined in functions (with '' in __qualname__) aren't normally importable. +We give them a unique name and register them on this module (ccflow.local_persistence). +We keep __module__ and __qualname__ unchanged so cloudpickle can still serialize the +class definition. + +This module provides: +- _register(cls): Register a local class with a unique import path +- _sync_to_module(cls): Ensure a class with __ccflow_import_path__ is on the module + (used for cross-process unpickle scenarios) """ import re @@ -10,66 +16,37 @@ import uuid from typing import Any, Type -__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "_register_local_subclass_if_needed", "_register", "_sync_to_module") - -LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.base" - - -def _is_importable(cls: Type[Any]) -> bool: - """Can cls be imported via its __module__.__qualname__ path?""" - qualname = getattr(cls, "__qualname__", "") - module_name = getattr(cls, "__module__", "") +__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME",) - if "" in qualname or module_name == "__main__": - return False - - module = sys.modules.get(module_name) - if not module: - return False - - obj = module - for part in qualname.split("."): - obj = getattr(obj, part, None) - if obj is None: - return False - return obj is cls +LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.local_persistence" def _register(cls: Type[Any]) -> None: - """Give cls a unique name and put it on ccflow.base.""" + """Give cls a unique name and register it on the artifacts module. + + This sets __ccflow_import_path__ on the class without modifying __module__ or + __qualname__, preserving cloudpickle's ability to serialize the class definition. + """ + # Sanitize the class name to be a valid Python identifier name = re.sub(r"[^0-9A-Za-z_]", "_", cls.__name__ or "Model").strip("_") or "Model" if name[0].isdigit(): name = f"_{name}" unique = f"_Local_{name}_{uuid.uuid4().hex[:12]}" - setattr(sys.modules["ccflow.base"], unique, cls) + setattr(sys.modules[LOCAL_ARTIFACTS_MODULE_NAME], unique, cls) cls.__ccflow_import_path__ = f"{LOCAL_ARTIFACTS_MODULE_NAME}.{unique}" def _sync_to_module(cls: Type[Any]) -> None: - """Ensure cls is on ccflow.base in this process (for cross-process unpickle).""" + """Ensure cls is registered on the artifacts module in this process. + + This handles cross-process unpickle scenarios where cloudpickle recreates the class + with __ccflow_import_path__ already set (from the original process), but the class + isn't yet registered on ccflow.local_persistence in the new process. + """ path = getattr(cls, "__ccflow_import_path__", "") if path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."): name = path.rsplit(".", 1)[-1] - base = sys.modules["ccflow.base"] + base = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] if getattr(base, name, None) is not cls: setattr(base, name, cls) - - -def _register_local_subclass_if_needed(cls: Type[Any]) -> None: - """Register cls if not importable. Called from PyObjectPath and __reduce_ex__.""" - if "__ccflow_import_path__" in cls.__dict__: - _sync_to_module(cls) - return - if "__ccflow_importable__" in cls.__dict__: - return - - from ccflow.base import BaseModel - - if not (isinstance(cls, type) and issubclass(cls, BaseModel)): - return - - if _is_importable(cls): - cls.__ccflow_importable__ = True - else: - _register(cls) diff --git a/ccflow/tests/evaluators/test_common.py b/ccflow/tests/evaluators/test_common.py index fa85958..6124ed2 100644 --- a/ccflow/tests/evaluators/test_common.py +++ b/ccflow/tests/evaluators/test_common.py @@ -1,21 +1,15 @@ import logging from datetime import date -from typing import ClassVar from unittest import TestCase import pandas as pd import pyarrow as pa from ccflow import ( - CallableModel, - ContextBase, DateContext, DateRangeContext, Evaluator, - Flow, - FlowOptions, FlowOptionsOverride, - GenericResult, ModelEvaluationContext, NullContext, ) @@ -29,7 +23,6 @@ combine_evaluators, get_dependency_graph, ) -from ccflow.tests.local_helpers import build_meta_sensor_planner, build_nested_graph_chain from .util import CircularModel, MyDateCallable, MyDateRangeCallable, MyRaisingCallable, NodeModel, ResultModel @@ -220,22 +213,6 @@ def test_logging_options_nested(self): self.assertIn("End evaluation of __call__", captured.records[2].getMessage()) self.assertIn("time elapsed", captured.records[2].getMessage()) - def test_meta_callable_logged_with_evaluator(self): - """Meta callables can spin up request-scoped specialists and still inherit evaluator instrumentation.""" - SensorQuery, MetaSensorPlanner, captured = build_meta_sensor_planner() - evaluator = LoggingEvaluator(log_level=logging.INFO, verbose=False) - request = SensorQuery(sensor_type="pressure-valve", site="orbital-lab", window=4) - meta = MetaSensorPlanner(warm_start=2) - with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator)): - with self.assertLogs(level=logging.INFO) as captured_logs: - result = meta(request) - self.assertEqual(result.value, "planner:orbital-lab:pressure-valve:6") - start_messages = [record.getMessage() for record in captured_logs.records if "Start evaluation" in record.getMessage()] - self.assertEqual(len(start_messages), 2) - self.assertTrue(any("MetaSensorPlanner" in msg for msg in start_messages)) - specialist_name = captured["callable_cls"].__name__ - self.assertTrue(any(specialist_name in msg for msg in start_messages)) - class SubContext(DateContext): pass @@ -503,37 +480,3 @@ def test_graph_evaluator_circular(self): evaluator = GraphEvaluator() with FlowOptionsOverride(options={"evaluator": evaluator}): self.assertRaises(Exception, root, context) - - def test_graph_evaluator_with_local_models_and_cache(self): - ParentCls, ChildCls = build_nested_graph_chain() - ChildCls.call_count = 0 - model = ParentCls(child=ChildCls()) - evaluator = MultiEvaluator(evaluators=[GraphEvaluator(), MemoryCacheEvaluator()]) - with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator, cacheable=True)): - first = model(NullContext()) - second = model(NullContext()) - self.assertEqual(first.value, second.value) - self.assertEqual(ChildCls.call_count, 1) - - -class TestMemoryCacheEvaluatorLocal(TestCase): - def test_memory_cache_handles_local_context_and_callable(self): - class LocalContext(ContextBase): - value: int - - class LocalModel(CallableModel): - call_count: ClassVar[int] = 0 - - @Flow.call - def __call__(self, context: LocalContext) -> GenericResult: - type(self).call_count += 1 - return GenericResult(value=context.value * 2) - - evaluator = MemoryCacheEvaluator() - LocalModel.call_count = 0 - model = LocalModel() - with FlowOptionsOverride(options=FlowOptions(evaluator=evaluator, cacheable=True)): - result1 = model(LocalContext(value=5)) - result2 = model(LocalContext(value=5)) - self.assertEqual(result1.value, result2.value) - self.assertEqual(LocalModel.call_count, 1) diff --git a/ccflow/tests/local_helpers.py b/ccflow/tests/local_helpers.py deleted file mode 100644 index b5ffcdf..0000000 --- a/ccflow/tests/local_helpers.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Shared helpers for constructing local-scope contexts/models in tests.""" - -from typing import ClassVar, Dict, Tuple, Type - -from ccflow import CallableModel, ContextBase, Flow, GenericResult, GraphDepList, NullContext - - -def build_meta_sensor_planner(): - """Return a (SensorQuery, MetaSensorPlanner, captured) tuple for meta-callable tests.""" - - captured: Dict[str, Type] = {} - - class SensorQuery(ContextBase): - sensor_type: str - site: str - window: int - - class MetaSensorPlanner(CallableModel): - warm_start: int = 2 - - @Flow.call - def __call__(self, context: SensorQuery) -> GenericResult: - # Define request-scoped specialist wiring with a bespoke context/model pair. - class SpecialistContext(ContextBase): - sensor_type: str - window: int - pipeline: str - - class SpecialistCallable(CallableModel): - pipeline: str - - @Flow.call - def __call__(self, context: SpecialistContext) -> GenericResult: - payload = f"{self.pipeline}:{context.sensor_type}:{context.window}" - return GenericResult(value=payload) - - captured["context_cls"] = SpecialistContext - captured["callable_cls"] = SpecialistCallable - - window = context.window + self.warm_start - local_context = SpecialistContext( - sensor_type=context.sensor_type, - window=window, - pipeline=f"{context.site}-calibration", - ) - specialist = SpecialistCallable(pipeline=f"planner:{context.site}") - return specialist(local_context) - - return SensorQuery, MetaSensorPlanner, captured - - -def build_local_callable(name: str = "LocalCallable") -> Type[CallableModel]: - class _LocalCallable(CallableModel): - @Flow.call - def __call__(self, context: NullContext) -> GenericResult: - return GenericResult(value="local") - - _LocalCallable.__name__ = name - return _LocalCallable - - -def build_local_context(name: str = "LocalContext") -> Type[ContextBase]: - class _LocalContext(ContextBase): - value: int - - _LocalContext.__name__ = name - return _LocalContext - - -def build_nested_graph_chain() -> Tuple[Type[CallableModel], Type[CallableModel]]: - class LocalLeaf(CallableModel): - call_count: ClassVar[int] = 0 - - @Flow.call - def __call__(self, context: NullContext) -> GenericResult: - type(self).call_count += 1 - return GenericResult(value="leaf") - - class LocalParent(CallableModel): - child: LocalLeaf - - @Flow.call - def __call__(self, context: NullContext) -> GenericResult: - return self.child(context) - - @Flow.deps - def __deps__(self, context: NullContext) -> GraphDepList: - return [(self.child, [context])] - - return LocalParent, LocalLeaf diff --git a/ccflow/tests/test_callable.py b/ccflow/tests/test_callable.py index 36c2133..43f86b5 100644 --- a/ccflow/tests/test_callable.py +++ b/ccflow/tests/test_callable.py @@ -1,6 +1,5 @@ -import sys from pickle import dumps as pdumps, loads as ploads -from typing import ClassVar, Generic, List, Optional, Tuple, Type, TypeVar, Union +from typing import Generic, List, Optional, Tuple, Type, TypeVar, Union from unittest import TestCase import ray @@ -23,28 +22,6 @@ WrapperModel, ) from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME -from ccflow.tests.local_helpers import build_local_callable, build_local_context, build_meta_sensor_planner - - -def _find_registered_name(module, cls): - for name, value in vars(module).items(): - if value is cls: - return name - raise AssertionError(f"{cls} not found in {module.__name__}") - - -def create_sensor_scope_models(): - class SensorScope(ContextBase): - reading: int - - class SensorCalibrator(CallableModel): - offset: int = 1 - - @Flow.call - def __call__(self, context: SensorScope) -> GenericResult: - return GenericResult(value=context.reading + self.offset) - - return SensorScope, SensorCalibrator class MyContext(ContextBase): @@ -518,247 +495,35 @@ def test_union_return(self): class TestCallableModelRegistration(TestCase): - def test_module_level_class_retains_module(self): - self.assertEqual(MyCallable.__module__, __name__) - dynamic_module = sys.modules.get(LOCAL_ARTIFACTS_MODULE_NAME) - if dynamic_module: - self.assertFalse(any(value is MyCallable for value in vars(dynamic_module).values())) - - def test_module_level_context_retains_module(self): - self.assertEqual(MyContext.__module__, __name__) - dynamic_module = sys.modules.get(LOCAL_ARTIFACTS_MODULE_NAME) - if dynamic_module: - self.assertFalse(any(value is MyContext for value in vars(dynamic_module).values())) - - def test_local_class_registered_with_import_path(self): - """Test that local-scope classes preserve __module__/__qualname__ but get an import path. - - The new behavior: - - __module__ stays as the original (preserves cloudpickle serialization) - - __qualname__ stays with '' (preserves cloudpickle serialization) - - __ccflow_import_path__ is set for PyObjectPath validation - - Class is registered in _local_artifacts under a unique name - """ - LocalCallable = build_local_callable() - # __module__ should NOT change to _local_artifacts - self.assertNotEqual(LocalCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - # __qualname__ should still have '' - self.assertIn("", LocalCallable.__qualname__) - # But __ccflow_import_path__ should be set and point to _local_artifacts - self.assertTrue(hasattr(LocalCallable, "__ccflow_import_path__")) - import_path = LocalCallable.__ccflow_import_path__ - self.assertTrue(import_path.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - # Class should be registered in _local_artifacts under the import path - dynamic_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - registered_name = import_path.split(".")[-1] - self.assertIs(getattr(dynamic_module, registered_name), LocalCallable) - # Functionality should work - result = LocalCallable()(NullContext()) - self.assertEqual(result.value, "local") - - def test_multiple_local_definitions_have_unique_import_paths(self): - """Test that multiple local classes get unique import paths.""" - first = build_local_callable() - second = build_local_callable() - # __qualname__ stays the same (both are 'build_local_callable.._LocalCallable') - self.assertEqual(first.__qualname__, second.__qualname__) - # But __ccflow_import_path__ should be unique - self.assertNotEqual(first.__ccflow_import_path__, second.__ccflow_import_path__) - # Both should be registered in _local_artifacts - dynamic_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - first_name = first.__ccflow_import_path__.split(".")[-1] - second_name = second.__ccflow_import_path__.split(".")[-1] - self.assertIs(getattr(dynamic_module, first_name), first) - self.assertIs(getattr(dynamic_module, second_name), second) - - def test_context_and_callable_same_name_do_not_collide(self): - def build_conflicting(): - class LocalThing(ContextBase): - value: int - - context_cls = LocalThing - - class LocalThing(CallableModel): - @Flow.call - def __call__(self, context: context_cls) -> GenericResult: - return GenericResult(value=context.value) - - callable_cls = LocalThing - return context_cls, callable_cls - - LocalContext, LocalCallable = build_conflicting() - locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - ctx_attr = _find_registered_name(locals_module, LocalContext) - model_attr = _find_registered_name(locals_module, LocalCallable) - # Both should use _Local__ format - self.assertTrue(ctx_attr.startswith("_Local_LocalThing_")) - self.assertTrue(model_attr.startswith("_Local_LocalThing_")) - # UUIDs make them unique even with same class name - self.assertEqual(getattr(locals_module, ctx_attr), LocalContext) - self.assertEqual(getattr(locals_module, model_attr), LocalCallable) - self.assertNotEqual(ctx_attr, model_attr, "UUIDs keep same-named classes distinct.") - - def test_local_callable_type_path_roundtrip(self): - LocalCallable = build_local_callable() - instance = LocalCallable() - path = instance.type_ - self.assertEqual(path.object, LocalCallable) - self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - - def test_local_context_type_path_roundtrip(self): - LocalContext = build_local_context() - ctx = LocalContext(value=10) - path = ctx.type_ - self.assertEqual(path.object, LocalContext) - self.assertTrue(str(path).startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - - def test_local_context_and_model_serialization_roundtrip(self): + """Smoke test verifying CallableModel inherits registration from BaseModel. + + NOTE: Registration behavior is thoroughly tested at the BaseModel level in + test_local_persistence.py. This single test verifies inheritance works. + """ + + def test_local_callable_smoke_test(self): + """Verify that local CallableModel classes inherit registration from BaseModel.""" + class LocalContext(ContextBase): value: int - class LocalModel(CallableModel): - factor: int = 2 - + class LocalCallable(CallableModel): @Flow.call def __call__(self, context: LocalContext) -> GenericResult: - return GenericResult(value=context.value * self.factor) - - instance = LocalModel(factor=5) - context = LocalContext(value=7) - result = instance(context) - self.assertEqual(result.value, 35) - serialized_model = instance.model_dump(mode="python") - restored_model = LocalModel.model_validate(serialized_model) - self.assertEqual(restored_model, instance) - serialized_context = context.model_dump(mode="python") - restored_context = LocalContext.model_validate(serialized_context) - self.assertEqual(restored_context, context) - restored_result = restored_model(restored_context) - self.assertEqual(restored_result.value, 35) - - def test_multiple_nested_levels_unique_import_paths(self): - """Test that multiple nested local classes all get unique import paths.""" - created = [] - - def layer(depth: int): - class LocalContext(ContextBase): - value: int - - class LocalModel(CallableModel): - multiplier: int = depth + 1 - call_count: ClassVar[int] = 0 - - @Flow.call - def __call__(self, context: LocalContext) -> GenericResult: - type(self).call_count += 1 - return GenericResult(value=context.value * self.multiplier) - - created.append((depth, LocalContext, LocalModel)) - - if depth < 2: - - def inner(): - layer(depth + 1) - - inner() - - def sibling_group(): - class LocalContext(ContextBase): - value: int - - class LocalModel(CallableModel): - @Flow.call - def __call__(self, context: LocalContext) -> GenericResult: - return GenericResult(value=context.value) - - created.append(("sibling", LocalContext, LocalModel)) - - layer(0) - sibling_group() - sibling_group() - - locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - - # __ccflow_import_path__ should be unique for each class - context_import_paths = {ctx.__ccflow_import_path__ for _, ctx, _ in created} - model_import_paths = {model.__ccflow_import_path__ for _, _, model in created} - self.assertEqual(len(context_import_paths), len(created)) - self.assertEqual(len(model_import_paths), len(created)) - - for label, ctx_cls, model_cls in created: - # Each class should be registered under its import path - ctx_name = ctx_cls.__ccflow_import_path__.split(".")[-1] - model_name = model_cls.__ccflow_import_path__.split(".")[-1] - self.assertIs(getattr(locals_module, ctx_name), ctx_cls) - self.assertIs(getattr(locals_module, model_name), model_cls) - # Original qualname should still have '' (preserved for cloudpickle) - self.assertIn("", ctx_cls.__qualname__) - self.assertIn("", model_cls.__qualname__) - ctx_instance = ctx_cls(value=4) - result = model_cls()(ctx_instance) - if isinstance(label, str): # sibling group - self.assertEqual(result.value, ctx_instance.value) - else: - self.assertEqual(result.value, ctx_instance.value * (label + 1)) - - def test_meta_callable_builds_dynamic_specialist(self): - SensorQuery, MetaSensorPlanner, captured = build_meta_sensor_planner() - request = SensorQuery(sensor_type="wind-turbine", site="ridge-line", window=5) - meta = MetaSensorPlanner(warm_start=3) - result = meta(request) - self.assertEqual(result.value, "planner:ridge-line:wind-turbine:8") - - locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - SpecialistContext = captured["context_cls"] - SpecialistCallable = captured["callable_cls"] - # __module__ should NOT change (preserves cloudpickle) - self.assertNotEqual(SpecialistContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertNotEqual(SpecialistCallable.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - # But __ccflow_import_path__ should be set and classes should be registered - ctx_name = SpecialistContext.__ccflow_import_path__.split(".")[-1] - model_name = SpecialistCallable.__ccflow_import_path__.split(".")[-1] - self.assertIs(getattr(locals_module, ctx_name), SpecialistContext) - self.assertIs(getattr(locals_module, model_name), SpecialistCallable) - - def test_dynamic_factory_pickle_roundtrip(self): - """Test that dynamically created local classes can be pickled with cloudpickle. - - Note: Standard pickle can't handle classes with '' in __qualname__ - because it tries to import the class by module.qualname. Cloudpickle can - serialize the class definition, which is essential for distributed computing - (e.g., Ray tasks). This tradeoff enables cross-process cloudpickle support - while PyObjectPath still works via __ccflow_import_path__. - """ - # Test with cloudpickle (which can serialize class definitions) - factory = rcploads(rcpdumps(create_sensor_scope_models)) - SensorContext, SensorModel = factory() - # __module__ should NOT change (preserves cloudpickle) - self.assertNotEqual(SensorContext.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - self.assertNotEqual(SensorModel.__module__, LOCAL_ARTIFACTS_MODULE_NAME) - # But __ccflow_import_path__ should be set and point to _local_artifacts - self.assertTrue(SensorContext.__ccflow_import_path__.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - self.assertTrue(SensorModel.__ccflow_import_path__.startswith(f"{LOCAL_ARTIFACTS_MODULE_NAME}.")) - # Classes should be registered in _local_artifacts - locals_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] - ctx_name = SensorContext.__ccflow_import_path__.split(".")[-1] - model_name = SensorModel.__ccflow_import_path__.split(".")[-1] - self.assertIs(getattr(locals_module, ctx_name), SensorContext) - self.assertIs(getattr(locals_module, model_name), SensorModel) - - context = SensorContext(reading=41) - model = SensorModel(offset=1) - self.assertEqual(model(context).value, 42) - - # Class roundtrip with cloudpickle - restored_context_cls = rcploads(rcpdumps(SensorContext)) - restored_model_cls = rcploads(rcpdumps(SensorModel)) - self.assertIs(restored_context_cls, SensorContext) - self.assertIs(restored_model_cls, SensorModel) - - # Instance roundtrip with cloudpickle - restored_context = rcploads(rcpdumps(context)) - restored_model = rcploads(rcpdumps(model)) - self.assertEqual(restored_model(restored_context).value, 42) + return GenericResult(value=context.value * 2) + + # Basic registration should work (inherits from BaseModel) + self.assertIn("", LocalCallable.__qualname__) + self.assertTrue(hasattr(LocalCallable, "__ccflow_import_path__")) + self.assertTrue(LocalCallable.__ccflow_import_path__.startswith(LOCAL_ARTIFACTS_MODULE_NAME)) + + # type_ should work + instance = LocalCallable() + self.assertEqual(instance.type_.object, LocalCallable) + + # Callable should execute correctly + result = instance(LocalContext(value=21)) + self.assertEqual(result.value, 42) class TestWrapperModel(TestCase): diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 657e180..78884d3 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -1,11 +1,22 @@ +"""Tests for local class registration in ccflow. + +The local persistence module allows classes defined inside functions (with '' +in their __qualname__) to work with PyObjectPath serialization by registering them +on ccflow.local_persistence with unique names. + +Key behaviors tested: +1. Local classes get __ccflow_import_path__ set at definition time +2. Module-level classes are NOT registered (they're already importable) +3. Cross-process cloudpickle works via _sync_to_module +4. UUID-based naming provides uniqueness +""" + import re import subprocess import sys -import textwrap -from unittest import TestCase +import pytest import ray -from pydantic import create_model as pydantic_create_model import ccflow.local_persistence as local_persistence from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext @@ -25,433 +36,88 @@ def __call__(self, context: NullContext) -> GenericResult: return GenericResult(value="ok") -class TestIsImportable(TestCase): - """Tests for _is_importable - the core check for whether a class needs registration.""" - - def test_module_level_ccflow_classes_are_importable(self): - for cls in (ModuleLevelModel, ModuleLevelContext, ModuleLevelCallable): - with self.subTest(cls=cls.__name__): - self.assertTrue(local_persistence._is_importable(cls)) - - def test_local_scope_class_not_importable(self): - def build_class(): - class LocalClass: - pass +# ============================================================================= +# Tests for _register function +# ============================================================================= - return LocalClass - LocalClass = build_class() - self.assertFalse(local_persistence._is_importable(LocalClass)) +def test_base_module_available_after_import(): + """Test that ccflow.local_persistence module is available after importing ccflow.""" + assert local_persistence.LOCAL_ARTIFACTS_MODULE_NAME in sys.modules - def test_main_module_class_not_importable(self): - # __main__ classes are not importable from other processes - cls = type("MainModuleClass", (), {}) - cls.__module__ = "__main__" - cls.__qualname__ = "MainModuleClass" - self.assertFalse(local_persistence._is_importable(cls)) - def test_module_level_class_is_importable(self): - # Module-level classes are importable - self.assertTrue(local_persistence._is_importable(ModuleLevelModel)) +def test_register_preserves_module_qualname_and_sets_import_path(): + """Test that _register sets __ccflow_import_path__ without changing __module__ or __qualname__.""" - def test_dynamically_created_class_not_importable(self): - """Test that dynamically created classes (like from pydantic's create_model) are not importable.""" - # This simulates what happens with pydantic's create_model - DynamicClass = type("DynamicClass", (), {}) - DynamicClass.__module__ = "ccflow.tests.test_local_persistence" - DynamicClass.__qualname__ = "DynamicClass" - # This class has a valid-looking module/qualname but isn't actually in the module - self.assertFalse(local_persistence._is_importable(DynamicClass)) + def build(): + class Foo: + pass + return Foo -def _run_subprocess(code: str) -> str: - """Execute code in a clean interpreter so sys.modules starts empty.""" - result = subprocess.run( - [sys.executable, "-c", textwrap.dedent(code)], - check=True, - capture_output=True, - text=True, - ) - return result.stdout.strip() + Foo = build() + original_module = Foo.__module__ + original_qualname = Foo.__qualname__ + local_persistence._register(Foo) -def test_base_module_available_after_import(): - """Test that ccflow.base module is available after importing ccflow.""" - output = _run_subprocess( - """ - import sys - import ccflow - print(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME in sys.modules) - """.replace("local_persistence.LOCAL_ARTIFACTS_MODULE_NAME", f'"{local_persistence.LOCAL_ARTIFACTS_MODULE_NAME}"') - ) - assert output == "True" + # __module__ and __qualname__ should NOT change (preserves cloudpickle) + assert Foo.__module__ == original_module, "__module__ should not change" + assert Foo.__qualname__ == original_qualname, "__qualname__ should not change" + assert "" in Foo.__qualname__, "__qualname__ should contain ''" - -def test_register_preserves_module_qualname_and_sets_import_path(): - output = _run_subprocess( - """ - import sys - import ccflow.local_persistence as lp - - def build(): - class Foo: - pass - return Foo - - Foo = build() - original_module = Foo.__module__ - original_qualname = Foo.__qualname__ - lp._register(Foo) - module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] - - # __module__ and __qualname__ should NOT change (preserves cloudpickle) - print(Foo.__module__ == original_module) - print(Foo.__qualname__ == original_qualname) - print("" in Foo.__qualname__) - - # __ccflow_import_path__ should be set and point to the registered class - import_path = Foo.__ccflow_import_path__ - registered_name = import_path.split(".")[-1] - print(hasattr(module, registered_name)) - print(getattr(module, registered_name) is Foo) - print(import_path.startswith("ccflow.base._Local_")) - """ - ) - lines = output.splitlines() - assert lines[0] == "True", f"__module__ should not change: {lines}" - assert lines[1] == "True", f"__qualname__ should not change: {lines}" - assert lines[2] == "True", f"__qualname__ should contain '': {lines}" - assert lines[3] == "True", f"Class should be registered in module: {lines}" - assert lines[4] == "True", f"Registered class should be the same object: {lines}" - assert lines[5] == "True", f"Import path should start with expected prefix: {lines}" + # __ccflow_import_path__ should be set and point to the registered class + import_path = Foo.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] + assert hasattr(module, registered_name), "Class should be registered in module" + assert getattr(module, registered_name) is Foo, "Registered class should be the same object" + assert import_path.startswith("ccflow.local_persistence._Local_"), "Import path should have expected prefix" def test_register_handles_class_name_starting_with_digit(): """Test that _register handles class names starting with a digit by prefixing with underscore.""" - output = _run_subprocess( - """ - import sys - import ccflow.local_persistence as lp + # Create a class with a name starting with a digit + cls = type("3DModel", (), {}) + local_persistence._register(cls) - # Create a class with a name starting with a digit - cls = type("3DModel", (), {}) - lp._register(cls) - - import_path = cls.__ccflow_import_path__ - registered_name = import_path.split(".")[-1] + import_path = cls.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] - # The registered name should start with _Local__ (underscore added for digit) - print(registered_name.startswith("_Local__")) - print("_3DModel_" in registered_name) + # The registered name should start with _Local__ (underscore added for digit) + assert registered_name.startswith("_Local__"), "Registered name should start with _Local__" + assert "_3DModel_" in registered_name, "Registered name should contain _3DModel_" - # Should be registered on ccflow.base - module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] - print(getattr(module, registered_name) is cls) - """ - ) - lines = output.splitlines() - assert lines[0] == "True", f"Registered name should start with _Local__: {lines}" - assert lines[1] == "True", f"Registered name should contain _3DModel_: {lines}" - assert lines[2] == "True", f"Class should be registered on module: {lines}" + # Should be registered on ccflow.local_persistence + module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] + assert getattr(module, registered_name) is cls, "Class should be registered on module" def test_sync_to_module_registers_class_not_yet_on_module(): """Test that _sync_to_module registers a class that has __ccflow_import_path__ but isn't on the module yet. This happens in cross-process unpickle scenarios where cloudpickle recreates the class - with __ccflow_import_path__ set, but the class isn't yet on ccflow.base. + with __ccflow_import_path__ set, but the class isn't yet on ccflow.local_persistence. """ - output = _run_subprocess( - """ - import sys - import ccflow.local_persistence as lp - - # Simulate a class that has __ccflow_import_path__ but isn't registered on ccflow.base - # (like what happens after cross-process cloudpickle unpickle) - cls = type("SimulatedUnpickled", (), {}) - unique_name = "_Local_SimulatedUnpickled_test123abc" - cls.__ccflow_import_path__ = f"{lp.LOCAL_ARTIFACTS_MODULE_NAME}.{unique_name}" - - # Verify class is NOT on ccflow.base yet - module = sys.modules[lp.LOCAL_ARTIFACTS_MODULE_NAME] - print(getattr(module, unique_name, None) is None) - - # Call _sync_to_module - lp._sync_to_module(cls) - - # Verify class IS now on ccflow.base - print(getattr(module, unique_name, None) is cls) - """ - ) - lines = output.splitlines() - assert lines[0] == "True", f"Class should NOT be on module before sync: {lines}" - assert lines[1] == "True", f"Class should be on module after sync: {lines}" - - -def test_local_basemodel_cloudpickle_cross_process(): - """Test that local-scope BaseModel subclasses work with cloudpickle cross-process. - - This is the key test for the "best of both worlds" approach: - - __qualname__ has '' so cloudpickle serializes the class definition - - __ccflow_import_path__ allows PyObjectPath validation to work - - After unpickling, __pydantic_init_subclass__ re-registers the class - """ - import os - import tempfile - - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - - try: - # Create a local-scope BaseModel in subprocess 1 and pickle it - create_result = subprocess.run( - [ - sys.executable, - "-c", - f""" -from ray.cloudpickle import dump -from ccflow import BaseModel - -def create_local_model(): - class LocalModel(BaseModel): - value: int - - return LocalModel - -LocalModel = create_local_model() - -# Verify __qualname__ has '' (enables cloudpickle serialization) -assert "" in LocalModel.__qualname__, f"Expected '' in qualname: {{LocalModel.__qualname__}}" - -# Verify __ccflow_import_path__ is set (enables PyObjectPath) -assert hasattr(LocalModel, "__ccflow_import_path__"), "Expected __ccflow_import_path__ to be set" - -# Create instance and verify type_ works (PyObjectPath validation) -instance = LocalModel(value=42) -type_path = instance.type_ -print(f"type_: {{type_path}}") - -# Pickle the instance -with open("{pkl_path}", "wb") as f: - dump(instance, f) - -print("SUCCESS: Created and pickled") -""", - ], - capture_output=True, - text=True, - ) - assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" - assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" + # Simulate a class that has __ccflow_import_path__ but isn't registered on ccflow.local_persistence + # (like what happens after cross-process cloudpickle unpickle) + cls = type("SimulatedUnpickled", (), {}) + unique_name = "_Local_SimulatedUnpickled_test123abc" + cls.__ccflow_import_path__ = f"{local_persistence.LOCAL_ARTIFACTS_MODULE_NAME}.{unique_name}" - # Load in subprocess 2 (different process, class not pre-defined) - load_result = subprocess.run( - [ - sys.executable, - "-c", - f""" -from ray.cloudpickle import load + # Verify class is NOT on ccflow.local_persistence yet + module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] + assert getattr(module, unique_name, None) is None, "Class should NOT be on module before sync" -with open("{pkl_path}", "rb") as f: - obj = load(f) + # Call _sync_to_module + local_persistence._sync_to_module(cls) -# Verify the value was preserved -assert obj.value == 42, f"Expected value=42, got {{obj.value}}" - -# Verify type_ works after unpickling (class was re-registered) -type_path = obj.type_ -print(f"type_: {{type_path}}") - -# Verify the import path works -import importlib -path_parts = str(type_path).rsplit(".", 1) -module = importlib.import_module(path_parts[0]) -cls = getattr(module, path_parts[1]) -assert cls is type(obj), "Import path should resolve to the same class" - -print("SUCCESS: Loaded and verified") -""", - ], - capture_output=True, - text=True, - ) - assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" - assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) + # Verify class IS now on ccflow.local_persistence + assert getattr(module, unique_name, None) is cls, "Class should be on module after sync" # ============================================================================= -# Tests for pydantic's create_model -# ============================================================================= - - -class TestPydanticCreateModel: - """Tests for dynamically created models using pydantic's create_model.""" - - def test_create_model_gets_registered(self): - """Test that models created with pydantic's create_model get registered.""" - - def make_model(): - return pydantic_create_model("DynamicModel", value=(int, ...), __base__=BaseModel) - - Model = make_model() - - # Registration happens lazily when type_ is accessed - # Create an instance and access type_ to trigger registration - instance = Model(value=42) - _ = instance.type_ - - # Should have __ccflow_import_path__ set after type_ access - assert hasattr(Model, "__ccflow_import_path__") - import_path = Model.__ccflow_import_path__ - assert import_path.startswith("ccflow.base._Local_") - - # Should be registered in ccflow.base - registered_name = import_path.split(".")[-1] - import ccflow.base - - assert hasattr(ccflow.base, registered_name) - assert getattr(ccflow.base, registered_name) is Model - - def test_create_model_type_works(self): - """Test that type_ property works for create_model-created models.""" - - def make_model(): - return pydantic_create_model("DynamicModel", value=(int, ...), __base__=BaseModel) - - Model = make_model() - instance = Model(value=42) - - # type_ should work and return the import path - type_path = instance.type_ - assert str(type_path) == Model.__ccflow_import_path__ - assert type_path.object is Model - - def test_create_model_importable(self): - """Test that create_model models can be imported via their type_ path.""" - import importlib - - def make_model(): - return pydantic_create_model("ImportableModel", data=(str, ...), __base__=BaseModel) - - Model = make_model() - instance = Model(data="test") - - type_path = str(instance.type_) - parts = type_path.rsplit(".", 1) - module = importlib.import_module(parts[0]) - imported_cls = getattr(module, parts[1]) - - assert imported_cls is Model - - def test_create_model_without_locals_still_gets_uuid(self): - """Test that create_model models (which don't have in qualname) get unique UUIDs.""" - - def make_models(): - Model1 = pydantic_create_model("SameName", x=(int, ...), __base__=BaseModel) - Model2 = pydantic_create_model("SameName", y=(str, ...), __base__=BaseModel) - return Model1, Model2 - - Model1, Model2 = make_models() - - # Both should be properly registered (trigger registration via type_) - instance1 = Model1.model_validate({"x": 1}) - instance2 = Model2.model_validate({"y": "test"}) - _ = instance1.type_ - _ = instance2.type_ - - # Both should have unique import paths (after type_ access triggers registration) - assert Model1.__ccflow_import_path__ != Model2.__ccflow_import_path__ - - assert instance1.x == 1 - assert instance2.y == "test" - - def test_create_model_context_base(self): - """Test create_model with ContextBase as base class.""" - - def make_context(): - return pydantic_create_model("DynamicContext", name=(str, ...), value=(int, ...), __base__=ContextBase) - - Context = make_context() - instance = Context(name="test", value=42) - - # Access type_ to trigger registration - type_path = instance.type_ - assert type_path.object is Context - - # After type_ access, __ccflow_import_path__ should be set - assert hasattr(Context, "__ccflow_import_path__") - - def test_create_model_cloudpickle_same_process(self): - """Test that create_model models work with cloudpickle in the same process.""" - from ray.cloudpickle import dumps, loads - - def make_model(): - return pydantic_create_model("PickleModel", value=(int, ...), __base__=BaseModel) - - Model = make_model() - instance = Model(value=123) - - restored = loads(dumps(instance)) - - assert restored.value == 123 - assert type(restored) is Model - - def test_create_model_cloudpickle_cross_process(self): - """Test that create_model models work with cloudpickle across processes.""" - import os - import tempfile - - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - - try: - create_code = f''' -from ray.cloudpickle import dump -from pydantic import create_model -from ccflow import BaseModel - -Model = create_model("CrossProcessModel", value=(int, ...), __base__=BaseModel) - -instance = Model(value=42) -# Access type_ to trigger registration (lazy for create_model) -type_path = instance.type_ -print(f"type_: {{type_path}}") - -# Now __ccflow_import_path__ should be set -assert hasattr(Model, "__ccflow_import_path__"), "Expected __ccflow_import_path__ after type_ access" - -with open("{pkl_path}", "wb") as f: - dump(instance, f) -print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - load_code = f''' -from ray.cloudpickle import load - -with open("{pkl_path}", "rb") as f: - obj = load(f) - -assert obj.value == 42 -type_path = obj.type_ -assert type_path.object is type(obj) -print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) - - -# ============================================================================= -# Comprehensive tests for local persistence and PyObjectPath integration +# Tests for local class registration via BaseModel # ============================================================================= @@ -472,7 +138,7 @@ class Inner(BaseModel): assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME def test_module_not_changed_to_local_artifacts(self): - """Verify that __module__ is NOT changed to ccflow.base.""" + """Verify that __module__ is NOT changed to ccflow.local_persistence.""" def create_class(): class Inner(ContextBase): @@ -481,7 +147,7 @@ class Inner(ContextBase): return Inner cls = create_class() - # __module__ should be this test module, not ccflow.base + # __module__ should be this test module, not ccflow.local_persistence assert cls.__module__ == "ccflow.tests.test_local_persistence" assert cls.__module__ != local_persistence.LOCAL_ARTIFACTS_MODULE_NAME @@ -499,7 +165,7 @@ class Inner(BaseModel): assert cls.__ccflow_import_path__.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME + ".") def test_class_registered_in_base_module(self): - """Verify that the class is registered in ccflow.base under import path.""" + """Verify that the class is registered in ccflow.local_persistence under import path.""" def create_class(): class Inner(BaseModel): @@ -647,168 +313,132 @@ class LocalModel(BaseModel): assert restored_type_path == original_type_path +# ============================================================================= +# Cross-process cloudpickle tests +# ============================================================================= + + +@pytest.fixture +def pickle_file(tmp_path): + """Provide a temporary pickle file path with automatic cleanup.""" + pkl_path = tmp_path / "test.pkl" + yield str(pkl_path) + + class TestCloudpickleCrossProcess: """Tests for cross-process cloudpickle behavior (subprocess tests).""" - def test_context_base_cross_process(self): - """Test cross-process cloudpickle for ContextBase subclasses.""" - import os - import tempfile - - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) + def test_local_basemodel_cloudpickle_cross_process(self, pickle_file): + """Test that local-scope BaseModel subclasses work with cloudpickle cross-process.""" + pkl_path = pickle_file - try: - create_code = f''' + # Create a local-scope BaseModel in subprocess 1 and pickle it + create_code = f''' from ray.cloudpickle import dump -from ccflow import ContextBase +from ccflow import BaseModel -def create_context(): - class LocalContext(ContextBase): - name: str +def create_local_model(): + class LocalModel(BaseModel): value: int - return LocalContext -LocalContext = create_context() -assert "" in LocalContext.__qualname__ -instance = LocalContext(name="test", value=42) -_ = instance.type_ # Verify type_ works before pickle + return LocalModel +LocalModel = create_local_model() + +# Verify __qualname__ has '' (enables cloudpickle serialization) +assert "" in LocalModel.__qualname__, f"Expected '' in qualname: {{LocalModel.__qualname__}}" + +# Verify __ccflow_import_path__ is set (enables PyObjectPath) +assert hasattr(LocalModel, "__ccflow_import_path__"), "Expected __ccflow_import_path__ to be set" + +# Create instance and verify type_ works (PyObjectPath validation) +instance = LocalModel(value=42) +type_path = instance.type_ +print(f"type_: {{type_path}}") + +# Pickle the instance with open("{pkl_path}", "wb") as f: dump(instance, f) -print("SUCCESS") + +print("SUCCESS: Created and pickled") ''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" - load_code = f''' + # Load in subprocess 2 (different process, class not pre-defined) + load_code = f''' from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: obj = load(f) -assert obj.name == "test" -assert obj.value == 42 -type_path = obj.type_ # Verify type_ works after unpickle -assert type_path.object is type(obj) -print("SUCCESS") +# Verify the value was preserved +assert obj.value == 42, f"Expected value=42, got {{obj.value}}" + +# Verify type_ works after unpickling (class was re-registered) +type_path = obj.type_ +print(f"type_: {{type_path}}") + +# Verify the import path works +import importlib +path_parts = str(type_path).rsplit(".", 1) +module = importlib.import_module(path_parts[0]) +cls = getattr(module, path_parts[1]) +assert cls is type(obj), "Import path should resolve to the same class" + +print("SUCCESS: Loaded and verified") ''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) - - def test_sync_to_module_lazy_on_cross_process_unpickle(self): - """Test that _sync_to_module happens lazily when accessing type_ after cross-process unpickle. - - When cloudpickle recreates a class in a new process, __ccflow_import_path__ is preserved - but the class is NOT immediately registered on ccflow.base (cloudpickle sets attributes - AFTER __pydantic_init_subclass__ runs). Registration happens lazily when: - - type_ is accessed (calls PyObjectPath.validate) - - PyObjectPath.validate(cls) is called directly - - _register_local_subclass_if_needed(cls) is called - """ - import os - import tempfile + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) + def test_context_base_cross_process(self, pickle_file): + """Test cross-process cloudpickle for ContextBase subclasses.""" + pkl_path = pickle_file - try: - # Process A: Create a local class and pickle the CLASS itself (not just instance) - create_code = f''' -import sys + create_code = f''' from ray.cloudpickle import dump -from ccflow import BaseModel -from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME +from ccflow import ContextBase -def create_model(): - class LocalModel(BaseModel): +def create_context(): + class LocalContext(ContextBase): + name: str value: int - return LocalModel - -LocalModel = create_model() - -# Verify class is registered and has import path -assert hasattr(LocalModel, "__ccflow_import_path__"), "Should have import path" -import_path = LocalModel.__ccflow_import_path__ -assert import_path.startswith(LOCAL_ARTIFACTS_MODULE_NAME + "."), f"Bad path: {{import_path}}" + return LocalContext -# Extract registered name and verify it's on ccflow.base -registered_name = import_path.split(".")[-1] -base_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] -assert getattr(base_module, registered_name, None) is LocalModel, "Should be on ccflow.base" +LocalContext = create_context() +assert "" in LocalContext.__qualname__ +instance = LocalContext(name="test", value=42) +_ = instance.type_ # Verify type_ works before pickle -# Pickle the CLASS itself (cloudpickle will serialize the class definition) with open("{pkl_path}", "wb") as f: - dump(LocalModel, f) - -# Output the import path so we can verify it's the same in process B -print(import_path) + dump(instance, f) +print("SUCCESS") ''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - original_import_path = create_result.stdout.strip() + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - # Process B: Unpickle the class and verify lazy sync behavior - load_code = f''' -import sys + load_code = f''' from ray.cloudpickle import load -from ccflow.local_persistence import LOCAL_ARTIFACTS_MODULE_NAME -from ccflow import PyObjectPath with open("{pkl_path}", "rb") as f: - LocalModel = load(f) - -# Verify __ccflow_import_path__ is preserved from pickle -assert hasattr(LocalModel, "__ccflow_import_path__"), "Should have import path after unpickle" -import_path = LocalModel.__ccflow_import_path__ -assert import_path == "{original_import_path}", f"Path mismatch: {{import_path}} != {original_import_path}" - -# BEFORE accessing type_: class is NOT yet registered on ccflow.base -# (cloudpickle sets attributes AFTER class creation, so __pydantic_init_subclass__ doesn't sync) -registered_name = import_path.split(".")[-1] -base_module = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] -registered_cls_before = getattr(base_module, registered_name, None) -# Note: registered_cls_before may be None OR may be LocalModel (if already synced by another path) - -# Trigger lazy sync via PyObjectPath.validate -path = PyObjectPath.validate(LocalModel) -assert path == import_path, f"PyObjectPath should return the ccflow import path" - -# AFTER accessing PyObjectPath.validate: class IS registered on ccflow.base -registered_cls_after = getattr(base_module, registered_name, None) -assert registered_cls_after is LocalModel, "_sync_to_module should have registered class on ccflow.base" - -# Verify PyObjectPath.object resolves correctly -assert path.object is LocalModel, "PyObjectPath.object should resolve to the class" - -# Verify the class is functional -instance = LocalModel(value=123) -assert instance.value == 123 + obj = load(f) +assert obj.name == "test" +assert obj.value == 42 +type_path = obj.type_ # Verify type_ works after unpickle +assert type_path.object is type(obj) print("SUCCESS") ''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - assert "SUCCESS" in load_result.stdout - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - def test_callable_model_cross_process(self): + def test_callable_model_cross_process(self, pickle_file): """Test cross-process cloudpickle for CallableModel subclasses.""" - import os - import tempfile + pkl_path = pickle_file - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - - try: - create_code = f''' + create_code = f''' from ray.cloudpickle import dump from ccflow import CallableModel, ContextBase, GenericResult, Flow @@ -837,10 +467,10 @@ def __call__(self, context: LocalContext) -> GenericResult: dump((instance, context), f) print("SUCCESS") ''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - load_code = f''' + load_code = f''' from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: @@ -855,114 +485,13 @@ def __call__(self, context: LocalContext) -> GenericResult: assert context.type_.object is type(context) print("SUCCESS") ''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - def test_nested_local_classes_cross_process(self): - """Test cross-process cloudpickle for multiply-nested local classes.""" - import os - import tempfile - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - - try: - create_code = f''' -from ray.cloudpickle import dump -from ccflow import BaseModel - -def outer(): - def inner(): - class DeeplyNested(BaseModel): - value: int - return DeeplyNested - return inner() - -cls = outer() -assert "" in cls.__qualname__ -assert cls.__qualname__.count("") == 2 # Two levels of nesting - -instance = cls(value=999) -with open("{pkl_path}", "wb") as f: - dump(instance, f) -print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - load_code = f''' -from ray.cloudpickle import load - -with open("{pkl_path}", "rb") as f: - obj = load(f) - -assert obj.value == 999 -_ = obj.type_ # Verify type_ works -print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) - - def test_multiple_instances_same_local_class_cross_process(self): - """Test that multiple instances of the same local class work cross-process.""" - import os - import tempfile - - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - - try: - create_code = f''' -from ray.cloudpickle import dump -from ccflow import BaseModel - -def create_class(): - class LocalModel(BaseModel): - value: int - return LocalModel - -cls = create_class() -instances = [cls(value=i) for i in range(5)] - -with open("{pkl_path}", "wb") as f: - dump(instances, f) -print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - load_code = f''' -from ray.cloudpickle import load - -with open("{pkl_path}", "rb") as f: - instances = load(f) - -# All instances should have the correct values -for i, instance in enumerate(instances): - assert instance.value == i - -# All instances should be of the same class -assert len(set(type(inst) for inst in instances)) == 1 - -# type_ should work for all -for instance in instances: - _ = instance.type_ -print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) +# ============================================================================= +# Module-level classes should not be affected +# ============================================================================= class TestModuleLevelClassesUnaffected: @@ -979,7 +508,7 @@ def test_module_level_class_type_path_uses_qualname(self): instance = ModuleLevelModel(value=1) type_path = str(instance.type_) - # Should use standard path, not ccflow.base._Local_... + # Should use standard path, not ccflow.local_persistence._Local_... assert type_path == "ccflow.tests.test_local_persistence.ModuleLevelModel" assert "_Local_" not in type_path @@ -993,22 +522,16 @@ def test_module_level_standard_pickle_works(self): assert type(restored) is ModuleLevelModel -class TestRayTaskWithLocalClasses: - """Tests for Ray task execution with locally-defined classes. +# ============================================================================= +# Ray task tests +# ============================================================================= - These tests verify that the local persistence mechanism works correctly - when classes are serialized and sent to Ray workers (different processes). - """ - def test_local_callable_model_ray_task(self): - """Test that locally-defined CallableModels can be sent to Ray tasks. +class TestRayTaskWithLocalClasses: + """Tests for Ray task execution with locally-defined classes.""" - This is the ultimate test of cross-process cloudpickle support: - - Local class defined in function (has in __qualname__) - - Sent to Ray worker (different process) - - Executed and returns correct result - - type_ property works after execution (PyObjectPath validation) - """ + def test_local_callable_model_ray_task(self): + """Test that locally-defined CallableModels can be sent to Ray tasks.""" def create_local_callable(): class LocalContext(ContextBase): @@ -1098,98 +621,44 @@ def process_model(m): with ray.init(num_cpus=1): result = ray.get(process_model.remote(model)) - assert result.startswith("hello|ccflow.base._Local_") - - def test_create_model_ray_task(self): - """Test that pydantic create_model models can be sent to Ray tasks.""" - - def make_model(): - return pydantic_create_model("RayModel", value=(int, ...), __base__=BaseModel) - - Model = make_model() - - @ray.remote - def process_model(m): - type_path = str(m.type_) - return f"{m.value}|{type_path}" - - model = Model(value=99) + assert result.startswith("hello|ccflow.local_persistence._Local_") - with ray.init(num_cpus=1): - result = ray.get(process_model.remote(model)) - assert result.startswith("99|ccflow.base._Local_") +# ============================================================================= +# UUID uniqueness tests +# ============================================================================= class TestUUIDUniqueness: - """Tests verifying UUID-based naming provides cross-process uniqueness.""" - - def test_different_processes_get_different_uuids(self): - """Test that different processes generate different UUIDs for same class name.""" - import os - import tempfile - - fd1, pkl_path1 = tempfile.mkstemp(suffix="_1.pkl") - fd2, pkl_path2 = tempfile.mkstemp(suffix="_2.pkl") - os.close(fd1) - os.close(fd2) - - try: - # Create same-named class in two different processes - # Note: For module-level classes, registration only happens when needed - # Access type_ to trigger registration before checking __ccflow_import_path__ - code = """ -import sys -from ray.cloudpickle import dump -from ccflow import BaseModel + """Tests verifying UUID-based naming provides uniqueness.""" -class SameName(BaseModel): - value: int - -instance = SameName(value={value}) -# Access type_ to trigger registration if needed -_ = instance.type_ -print(SameName.__ccflow_import_path__) + def test_multiple_local_classes_same_name_get_unique_paths(self): + """Test that multiple local classes with same name get unique import paths.""" -with open("{pkl_path}", "wb") as f: - dump(instance, f) -""" - result1 = subprocess.run([sys.executable, "-c", code.format(value=1, pkl_path=pkl_path1)], capture_output=True, text=True) - assert result1.returncode == 0, f"Process 1 failed: {result1.stderr}" - path1 = result1.stdout.strip() - - result2 = subprocess.run([sys.executable, "-c", code.format(value=2, pkl_path=pkl_path2)], capture_output=True, text=True) - assert result2.returncode == 0, f"Process 2 failed: {result2.stderr}" - path2 = result2.stdout.strip() + def create_model_a(): + class SameName(BaseModel): + value: int - # UUIDs should be different even though class names are the same - assert path1 != path2 - assert "_Local_SameName_" in path1 - assert "_Local_SameName_" in path2 + return SameName - # Both should be loadable in the same process - load_code = f''' -from ray.cloudpickle import load + def create_model_b(): + class SameName(BaseModel): + value: str -with open("{pkl_path1}", "rb") as f: - obj1 = load(f) + return SameName -with open("{pkl_path2}", "rb") as f: - obj2 = load(f) + ModelA = create_model_a() + ModelB = create_model_b() -assert obj1.value == 1 -assert obj2.value == 2 -# They should be different types -assert type(obj1) is not type(obj2) -print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + # Both have same class name but different import paths + assert ModelA.__name__ == ModelB.__name__ == "SameName" + assert ModelA.__ccflow_import_path__ != ModelB.__ccflow_import_path__ - finally: - for p in [pkl_path1, pkl_path2]: - if os.path.exists(p): - os.unlink(p) + # Both should be accessible + instance_a = ModelA(value=42) + instance_b = ModelB(value="hello") + assert instance_a.type_.object is ModelA + assert instance_b.type_.object is ModelB def test_uuid_format_is_valid(self): """Test that the UUID portion of names is valid hex.""" @@ -1213,7 +682,7 @@ class TestModel(BaseModel): # ============================================================================= -# Edge case tests +# Nested class and inheritance tests # ============================================================================= @@ -1226,65 +695,22 @@ class NestedModel(BaseModel): value: int -class TestImportString: - """Tests for the import_string function.""" - - def test_import_string_handles_nested_class_path(self): - """Verify our import_string handles nested class paths that pydantic's ImportString cannot. - - Pydantic's ImportString fails on paths like 'module.OuterClass.InnerClass' because - it tries to import the entire path as a module. Our import_string progressively - tries shorter module paths and uses getattr for the rest. - """ - import pytest - from pydantic import ImportString, TypeAdapter - - from ccflow.exttypes.pyobjectpath import import_string - - nested_path = "ccflow.tests.test_local_persistence.OuterClass.NestedModel" - - # Pydantic's ImportString fails on nested class paths - pydantic_adapter = TypeAdapter(ImportString) - with pytest.raises(Exception, match="No module named"): - pydantic_adapter.validate_python(nested_path) - - # Our import_string handles it correctly - result = import_string(nested_path) - assert result is OuterClass.NestedModel - - def test_import_string_still_works_for_simple_paths(self): - """Verify import_string still works for simple module.ClassName paths.""" - from ccflow.exttypes.pyobjectpath import import_string - - # Simple class path - result = import_string("ccflow.tests.test_local_persistence.ModuleLevelModel") - assert result is ModuleLevelModel - - # Built-in module - result = import_string("os.path.join") - import os.path - - assert result is os.path.join - - class TestNestedClasses: """Tests for classes nested inside other classes.""" - def test_nested_class_inside_module_level_class_is_importable(self): - """Verify that a nested class inside a module-level class IS importable. + def test_nested_class_inside_module_level_class_not_registered(self): + """Verify that a nested class inside a module-level class is NOT registered. Classes nested inside module-level classes (like OuterClass.NestedModel) - have qualnames like 'OuterClass.NestedModel' and ARE importable via - the standard module.qualname path. + have qualnames like 'OuterClass.NestedModel' without '' and ARE + importable via the standard module.qualname path. """ - # The qualname has a '.' indicating nested class + # The qualname has a '.' indicating nested class, but no '' assert "." in OuterClass.NestedModel.__qualname__ assert OuterClass.NestedModel.__qualname__ == "OuterClass.NestedModel" + assert "" not in OuterClass.NestedModel.__qualname__ - # Should be importable - it's in the module namespace - assert local_persistence._is_importable(OuterClass.NestedModel) - - # Should not have __ccflow_import_path__ + # Should NOT have __ccflow_import_path__ assert "__ccflow_import_path__" not in OuterClass.NestedModel.__dict__ # type_ should use standard path @@ -1294,8 +720,8 @@ def test_nested_class_inside_module_level_class_is_importable(self): assert "_Local_" not in type_path assert instance.type_.object is OuterClass.NestedModel - def test_nested_class_inside_function_not_importable(self): - """Verify that a class nested inside a function-defined class is not importable.""" + def test_nested_class_inside_function_is_registered(self): + """Verify that a class nested inside a function-defined class IS registered.""" def create_outer(): class Outer: @@ -1305,11 +731,10 @@ class Inner(BaseModel): return Outer Outer = create_outer() - # The inner class has in its qualname (from Outer) + # The inner class has in its qualname (from the function) assert "" in Outer.Inner.__qualname__ - assert not local_persistence._is_importable(Outer.Inner) - # Should get registered and have __ccflow_import_path__ + # Should be registered and have __ccflow_import_path__ assert hasattr(Outer.Inner, "__ccflow_import_path__") @@ -1348,7 +773,7 @@ class LocalDerived(base_cls): assert base_instance.type_.object is Base assert derived_instance.type_.object is Derived - def test_subclass_of_module_level_class_not_registered(self): + def test_subclass_of_module_level_class_is_registered(self): """Verify that subclassing a module-level class inside a function creates a local class.""" def create_subclass(): @@ -1367,91 +792,14 @@ class LocalSubclass(ModuleLevelModel): assert "__ccflow_import_path__" not in ModuleLevelModel.__dict__ -class TestCreateModelEdgeCases: - """Additional edge case tests for pydantic's create_model.""" - - def test_create_model_with_same_name_as_module_level_class(self): - """Test create_model with a name that matches an existing module-level class. - - The dynamically created class should get its own registration, not - conflict with the module-level class. - """ - - def make_model(): - # Create a model with the same name as ModuleLevelModel - return pydantic_create_model("ModuleLevelModel", x=(int, ...), __base__=BaseModel) - - DynamicModel = make_model() - - # The dynamic model should NOT be the same as the module-level one - assert DynamicModel is not ModuleLevelModel - - # Access type_ to trigger registration - instance = DynamicModel(x=123) - type_path = str(instance.type_) - - # Should get a _Local_ path since it's not actually importable - assert "_Local_" in type_path - assert "ModuleLevelModel" in type_path - - # The module-level class should still use its standard path - module_instance = ModuleLevelModel(value=456) - module_type_path = str(module_instance.type_) - assert module_type_path == "ccflow.tests.test_local_persistence.ModuleLevelModel" - - def test_create_model_result_assigned_to_different_name(self): - """Test that create_model models get registered even when assigned to a different name. - - create_model("Foo", ...) assigned to variable Bar should still work. - """ - DifferentName = pydantic_create_model("OriginalName", value=(int, ...), __base__=BaseModel) - - # The class __name__ is "OriginalName" but it's stored in variable DifferentName - assert DifferentName.__name__ == "OriginalName" - - # Should still work correctly - instance = DifferentName(value=42) - type_path = str(instance.type_) - - # The registration should use "OriginalName" (the __name__) - assert "OriginalName" in type_path - assert instance.type_.object is DifferentName +# ============================================================================= +# Generic types tests +# ============================================================================= class TestGenericTypes: """Tests for generic types and PyObjectPath.""" - def test_generic_basemodel_type_path(self): - """Test that generic BaseModel subclasses work with type_. - - When you parameterize a generic type like GenericModel[int], - the resulting class has __name__='GenericModel[int]'. - The registration sanitizes this to 'GenericModel_int_'. - """ - from typing import Generic, TypeVar - - T = TypeVar("T") - - def create_generic(): - class GenericModel(BaseModel, Generic[T]): - data: T - - return GenericModel - - GenericModel = create_generic() - - # Create a concrete (parameterized) instance - instance = GenericModel[int](data=42) - - # type_ should work - the class gets registered with sanitized name - type_path = str(instance.type_) - # The parameterized type has __name__='GenericModel[int]' - # which gets sanitized to 'GenericModel_int_' in the registration - assert "_Local_" in type_path - assert "GenericModel" in type_path - # Verify the path actually resolves to the correct class - assert instance.type_.object is type(instance) - def test_unparameterized_generic_type_path(self): """Test that unparameterized generic types work with type_.""" from typing import Generic, TypeVar @@ -1473,114 +821,81 @@ class GenericModel(BaseModel, Generic[T]): type_path = str(instance.type_) assert "_Local_" in type_path assert "GenericModel" in type_path - # No type parameter suffix since we used unparameterized version assert instance.type_.object is GenericModel - def test_build_standard_import_path_strips_brackets(self): - """Test that _build_standard_import_path strips generic type parameters from qualname.""" - from ccflow.exttypes.pyobjectpath import _build_standard_import_path - - class MockClass: - __module__ = "test.module" - __qualname__ = "MyClass[int, str]" - - path = _build_standard_import_path(MockClass) - assert "[" not in path - assert path == "test.module.MyClass" - - -class TestReduceTriggersRegistration: - """Tests verifying that __reduce__ triggers registration for consistent cross-process UUIDs.""" + def test_generic_base_class_is_registered(self): + """Test that the unparameterized generic class is correctly registered. - def test_pickle_triggers_registration_for_create_model(self): - """Verify that pickling a create_model instance triggers registration. - - Without __reduce__ triggering registration, create_model classes would get - different UUIDs in different processes if pickled before type_ is accessed. + Note: Parameterized generics (e.g., GenericModel[int]) create new classes + that lose the '' marker in their qualname due to Python/pydantic's + generic handling. Use unparameterized generics for local class scenarios, + or define concrete subclasses. """ - from ray.cloudpickle import dumps, loads + from typing import Generic, TypeVar - def factory(): - return pydantic_create_model("ReduceTestModel", value=(int, ...), __base__=BaseModel) + T = TypeVar("T") - DynamicModel = factory() - instance = DynamicModel(value=42) + def create_generic(): + class GenericModel(BaseModel, Generic[T]): + data: T - # Before pickle, no __ccflow_import_path__ (deferred registration) - assert "__ccflow_import_path__" not in DynamicModel.__dict__ + return GenericModel - # Pickle triggers __reduce__ which triggers registration - pickled = dumps(instance) + GenericModel = create_generic() - # After pickle, registration has happened - assert "__ccflow_import_path__" in DynamicModel.__dict__ + # The unparameterized class should be registered + assert "" in GenericModel.__qualname__ + assert hasattr(GenericModel, "__ccflow_import_path__") + assert GenericModel.__ccflow_import_path__.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME) - # Unpickle and verify same UUID - restored = loads(pickled) - assert type(restored).__ccflow_import_path__ == DynamicModel.__ccflow_import_path__ - def test_pickle_cross_process_consistent_uuid(self): - """Verify that pickling before type_ access gives consistent UUIDs across processes.""" - import os - import tempfile +# ============================================================================= +# Import string tests +# ============================================================================= - fd, pkl_path = tempfile.mkstemp(suffix=".pkl") - os.close(fd) - try: - # Process 1: Create and pickle WITHOUT accessing type_ first - create_code = f''' -from ray.cloudpickle import dump -from pydantic import create_model -from ccflow import BaseModel +class TestImportString: + """Tests for the import_string function.""" -def factory(): - return create_model("CrossProcessReduceModel", value=(int, ...), __base__=BaseModel) + def test_import_string_handles_nested_class_path(self): + """Verify our import_string handles nested class paths that pydantic's ImportString cannot.""" + from pydantic import ImportString, TypeAdapter -Model = factory() -instance = Model(value=42) + from ccflow.exttypes.pyobjectpath import import_string -# Pickle WITHOUT accessing type_ - __reduce__ should trigger registration -with open("{pkl_path}", "wb") as f: - dump(instance, f) + nested_path = "ccflow.tests.test_local_persistence.OuterClass.NestedModel" -print(Model.__ccflow_import_path__) -''' - result1 = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert result1.returncode == 0, f"Process 1 failed: {result1.stderr}" - path1 = result1.stdout.strip() + # Pydantic's ImportString fails on nested class paths + pydantic_adapter = TypeAdapter(ImportString) + with pytest.raises(Exception, match="No module named"): + pydantic_adapter.validate_python(nested_path) - # Process 2: Load and verify same UUID - load_code = f''' -from ray.cloudpickle import load + # Our import_string handles it correctly + result = import_string(nested_path) + assert result is OuterClass.NestedModel -with open("{pkl_path}", "rb") as f: - restored = load(f) + def test_import_string_still_works_for_simple_paths(self): + """Verify import_string still works for simple module.ClassName paths.""" + from ccflow.exttypes.pyobjectpath import import_string -print(type(restored).__ccflow_import_path__) -''' - result2 = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert result2.returncode == 0, f"Process 2 failed: {result2.stderr}" - path2 = result2.stdout.strip() + # Simple class path + result = import_string("ccflow.tests.test_local_persistence.ModuleLevelModel") + assert result is ModuleLevelModel - # UUIDs must match - assert path1 == path2, f"UUID mismatch: {path1} != {path2}" + # Built-in module + result = import_string("os.path.join") + import os.path - finally: - if os.path.exists(pkl_path): - os.unlink(pkl_path) + assert result is os.path.join -class TestRegistrationStrategy: - """Tests verifying the registration strategy for different class types. +# ============================================================================= +# Registration strategy tests +# ============================================================================= - The strategy is: - 1. Module-level classes: No registration needed, they're importable via standard path - 2. Local classes ( in qualname): Register immediately during class definition - 3. Dynamic classes (create_model, no ): Registered during pickle via __reduce__ - This keeps import-time overhead minimal while still handling all cases correctly. - """ +class TestRegistrationStrategy: + """Tests verifying the registration strategy for different class types.""" def test_module_level_classes_not_registered(self): """Module-level classes should NOT have __ccflow_import_path__ set.""" @@ -1610,65 +925,3 @@ class LocalModel(BaseModel): mock_do_reg.assert_called_once() # Verify it has in qualname assert "" in LocalModel.__qualname__ - - def test_create_model_defers_registration_until_type_access(self): - """create_model classes should defer registration until type_ is accessed. - - This is the key optimization: create_model produces classes without - in their qualname, so we can't tell at definition time if they need registration. - We defer the _is_importable check until type_ is accessed. - """ - from unittest import mock - - with mock.patch.object(local_persistence, "_register") as mock_do_reg: - # Create a model via create_model - DynamicModel = pydantic_create_model("DeferredModel", x=(int, ...), __base__=BaseModel) - - # No in qualname - assert "" not in DynamicModel.__qualname__ - - # _register should NOT have been called yet - mock_do_reg.assert_not_called() - - # Now access type_ which triggers the deferred check - instance = DynamicModel(x=1) - _ = instance.type_ - - # NOW it should have __ccflow_import_path__ - assert "__ccflow_import_path__" in DynamicModel.__dict__ - - def test_is_importable_only_called_lazily(self): - """_is_importable should only be called when we need to check, not during import.""" - from unittest import mock - - # Create a model via create_model - DynamicModel = pydantic_create_model("LazyCheckModel", x=(int, ...), __base__=BaseModel) - - with mock.patch.object(local_persistence, "_is_importable", wraps=local_persistence._is_importable) as mock_is_imp: - # Access type_ which triggers lazy registration check - instance = DynamicModel(x=1) - _ = instance.type_ - - # _is_importable should have been called - assert mock_is_imp.call_count >= 1 - - -class TestIsImportableEdgeCases: - """Tests for edge cases in the _is_importable function.""" - - def test_class_with_module_not_in_sys_modules(self): - """Test that a class claiming to be from an unloaded module is not importable.""" - cls = type("NotImportable", (), {}) - cls.__module__ = "nonexistent.fake.module" - cls.__qualname__ = "NotImportable" - - assert not local_persistence._is_importable(cls) - - def test_class_in_wrong_module(self): - """Test that a class claiming to be from a module it's not in is not importable.""" - cls = type("FakeClass", (), {}) - # Claim to be from this module, but with a name that doesn't exist - cls.__module__ = "ccflow.tests.test_local_persistence" - cls.__qualname__ = "ThisClassDoesNotExist" - - assert not local_persistence._is_importable(cls) From b3644a1e6eb57a263bf6e2d833e05d4fef78921f Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 03:37:11 -0500 Subject: [PATCH 11/15] Add wrapper around create_model from pydantic that also does registration Signed-off-by: Nijat Khanbabayev --- ccflow/__init__.py | 1 + ccflow/local_persistence.py | 32 +- ccflow/tests/test_local_persistence.py | 480 +++++++++++++++++++++++++ 3 files changed, 512 insertions(+), 1 deletion(-) diff --git a/ccflow/__init__.py b/ccflow/__init__.py index 30706d7..163f275 100644 --- a/ccflow/__init__.py +++ b/ccflow/__init__.py @@ -12,6 +12,7 @@ from .context import * from .enums import Enum from .global_state import * +from .local_persistence import * from .models import * from .object_config import * from .publisher import * diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index bc598fa..5b0981f 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -9,6 +9,7 @@ class definition. - _register(cls): Register a local class with a unique import path - _sync_to_module(cls): Ensure a class with __ccflow_import_path__ is on the module (used for cross-process unpickle scenarios) +- create_ccflow_model: Wrapper around pydantic.create_model that registers the created model """ import re @@ -16,7 +17,7 @@ class definition. import uuid from typing import Any, Type -__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME",) +__all__ = ("LOCAL_ARTIFACTS_MODULE_NAME", "create_ccflow_model") LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.local_persistence" @@ -50,3 +51,32 @@ def _sync_to_module(cls: Type[Any]) -> None: base = sys.modules[LOCAL_ARTIFACTS_MODULE_NAME] if getattr(base, name, None) is not cls: setattr(base, name, cls) + + +def create_ccflow_model(__model_name: str, *, __base__: Any = None, **field_definitions: Any) -> Type[Any]: + """Create a dynamic ccflow model and register it for PyObjectPath serialization. + + Wraps pydantic's create_model and registers the model so it can be serialized + via PyObjectPath, including across processes (e.g., with Ray). + + Example: + >>> from ccflow import ContextBase, create_ccflow_model + >>> MyContext = create_ccflow_model( + ... "MyContext", + ... __base__=ContextBase, + ... name=(str, ...), + ... value=(int, 0), + ... ) + >>> ctx = MyContext(name="test", value=42) + """ + from pydantic import create_model as pydantic_create_model + + model = pydantic_create_model(__model_name, __base__=__base__, **field_definitions) + + # Register if it's a ccflow BaseModel subclass + from ccflow.base import BaseModel + + if isinstance(model, type) and issubclass(model, BaseModel): + _register(model) + + return model diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 78884d3..18dfed4 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -20,6 +20,7 @@ import ccflow.local_persistence as local_persistence from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, NullContext +from ccflow.local_persistence import create_ccflow_model class ModuleLevelModel(BaseModel): @@ -925,3 +926,482 @@ class LocalModel(BaseModel): mock_do_reg.assert_called_once() # Verify it has in qualname assert "" in LocalModel.__qualname__ + + +# ============================================================================= +# Tests for create_ccflow_model wrapper +# ============================================================================= + + +class TestCreateCcflowModelWrapper: + """Tests for the create_ccflow_model wrapper function.""" + + def test_create_ccflow_model_basic(self): + """Test basic create_ccflow_model usage with ContextBase.""" + DynamicContext = create_ccflow_model( + "DynamicContext", + __base__=ContextBase, + name=(str, ...), + value=(int, 0), + ) + + # Should be a valid ContextBase subclass + assert issubclass(DynamicContext, ContextBase) + + # Should be registered + assert hasattr(DynamicContext, "__ccflow_import_path__") + assert DynamicContext.__ccflow_import_path__.startswith(local_persistence.LOCAL_ARTIFACTS_MODULE_NAME) + + # Should be usable + ctx = DynamicContext(name="test", value=42) + assert ctx.name == "test" + assert ctx.value == 42 + + def test_create_ccflow_model_with_base_model(self): + """Test create_ccflow_model with ccflow BaseModel as base.""" + DynamicModel = create_ccflow_model( + "DynamicModel", + __base__=BaseModel, + data=(str, "default"), + count=(int, 0), + ) + + assert issubclass(DynamicModel, BaseModel) + assert hasattr(DynamicModel, "__ccflow_import_path__") + + instance = DynamicModel(data="hello", count=5) + assert instance.data == "hello" + assert instance.count == 5 + + def test_create_ccflow_model_type_property_works(self): + """Test that type_ property works for dynamically created models.""" + DynamicContext = create_ccflow_model( + "DynamicContext", + __base__=ContextBase, + x=(int, ...), + ) + + ctx = DynamicContext(x=10) + type_path = str(ctx.type_) + + # type_ should use __ccflow_import_path__ + assert type_path == DynamicContext.__ccflow_import_path__ + assert "_Local_" in type_path + assert ctx.type_.object is DynamicContext + + def test_create_ccflow_model_can_be_imported(self): + """Test that dynamically created models can be imported via their path.""" + import importlib + + DynamicModel = create_ccflow_model( + "ImportableModel", + __base__=BaseModel, + value=(int, 0), + ) + + import_path = DynamicModel.__ccflow_import_path__ + parts = import_path.rsplit(".", 1) + module = importlib.import_module(parts[0]) + imported_cls = getattr(module, parts[1]) + + assert imported_cls is DynamicModel + + def test_create_ccflow_model_with_docstring(self): + """Test create_ccflow_model with custom docstring.""" + + DynamicModel = create_ccflow_model( + "DocumentedModel", + __base__=BaseModel, + __doc__="A dynamically created model for testing.", + value=(int, 0), + ) + + assert DynamicModel.__doc__ == "A dynamically created model for testing." + + def test_create_ccflow_model_with_complex_fields(self): + """Test create_ccflow_model with various field types.""" + from typing import List, Optional + + from pydantic import Field + + DynamicModel = create_ccflow_model( + "ComplexModel", + __base__=BaseModel, + name=(str, ...), + tags=(List[str], Field(default_factory=list)), + description=(Optional[str], None), + count=(int, 0), + ) + + instance = DynamicModel(name="test") + assert instance.name == "test" + assert instance.tags == [] + assert instance.description is None + assert instance.count == 0 + + instance2 = DynamicModel(name="test2", tags=["a", "b"], description="desc", count=5) + assert instance2.tags == ["a", "b"] + assert instance2.description == "desc" + assert instance2.count == 5 + + def test_create_ccflow_model_multiple_unique_names(self): + """Test that multiple models with same name get unique registration paths.""" + + Model1 = create_ccflow_model("SameName", __base__=BaseModel, value=(int, 0)) + Model2 = create_ccflow_model("SameName", __base__=BaseModel, value=(str, "")) + + # Both should be registered with different paths + assert Model1.__ccflow_import_path__ != Model2.__ccflow_import_path__ + + # Both should have the same __name__ + assert Model1.__name__ == Model2.__name__ == "SameName" + + # Both should be accessible via their own paths + assert Model1(value=42).type_.object is Model1 + assert Model2(value="test").type_.object is Model2 + + def test_create_ccflow_model_inherits_from_context_base(self): + """Test that models inheriting from ContextBase have frozen config.""" + DynamicContext = create_ccflow_model( + "FrozenContext", + __base__=ContextBase, + value=(int, 0), + ) + + ctx = DynamicContext(value=42) + + # ContextBase subclasses should be frozen + with pytest.raises(Exception): # ValidationError for frozen model + ctx.value = 100 + + +class TestCreateCcflowModelCloudpickleSameProcess: + """Tests for cloudpickle with dynamically created models in the same process.""" + + def test_cloudpickle_instance_roundtrip(self): + """Test cloudpickle roundtrip for instances of dynamically created models.""" + from ray.cloudpickle import dumps, loads + + DynamicModel = create_ccflow_model( + "PickleTestModel", + __base__=BaseModel, + value=(int, 0), + ) + + instance = DynamicModel(value=123) + restored = loads(dumps(instance)) + + assert restored.value == 123 + assert type(restored) is DynamicModel + + def test_cloudpickle_class_roundtrip(self): + """Test cloudpickle roundtrip for dynamically created model classes.""" + from ray.cloudpickle import dumps, loads + + DynamicModel = create_ccflow_model( + "ClassPickleModel", + __base__=BaseModel, + name=(str, ""), + ) + + restored_cls = loads(dumps(DynamicModel)) + assert restored_cls is DynamicModel + + def test_cloudpickle_preserves_type_path(self): + """Test that type_ path is preserved after cloudpickle roundtrip.""" + from ray.cloudpickle import dumps, loads + + DynamicModel = create_ccflow_model( + "TypePathModel", + __base__=BaseModel, + value=(int, 0), + ) + + instance = DynamicModel(value=42) + original_path = str(instance.type_) + + restored = loads(dumps(instance)) + restored_path = str(restored.type_) + + assert restored_path == original_path + + +class TestCreateCcflowModelCloudpickleCrossProcess: + """Tests for cross-process cloudpickle with dynamically created models.""" + + def test_create_ccflow_model_cross_process(self, pickle_file): + """Test that dynamically created models work across processes.""" + pkl_path = pickle_file + + create_code = f''' +from ray.cloudpickle import dump +from ccflow import ContextBase +from ccflow.local_persistence import create_ccflow_model + +# Create a dynamic context using the wrapper +DynamicContext = create_ccflow_model( + "CrossProcessContext", + __base__=ContextBase, + name=(str, ...), + value=(int, 0), +) + +# Verify registration +assert hasattr(DynamicContext, "__ccflow_import_path__") + +# Create instance and verify type_ works +ctx = DynamicContext(name="test", value=42) +type_path = str(ctx.type_) +print(f"type_: {{type_path}}") +assert "_Local_" in type_path + +# Pickle +with open("{pkl_path}", "wb") as f: + dump(ctx, f) + +print("SUCCESS: Created and pickled") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + ctx = load(f) + +# Verify values preserved +assert ctx.name == "test", f"Expected name='test', got {{ctx.name}}" +assert ctx.value == 42, f"Expected value=42, got {{ctx.value}}" + +# Verify type_ works after unpickle +type_path = str(ctx.type_) +print(f"type_: {{type_path}}") +assert "_Local_" in type_path + +# Verify the class can be resolved +assert ctx.type_.object is type(ctx) + +print("SUCCESS: Loaded and verified") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" + + def test_create_ccflow_model_with_callable_model_cross_process(self, pickle_file): + """Test dynamically created contexts used with CallableModel across processes.""" + pkl_path = pickle_file + + create_code = f''' +from ray.cloudpickle import dump +from ccflow import CallableModel, ContextBase, GenericResult, Flow +from ccflow.local_persistence import create_ccflow_model + +# Create a dynamic context +DynamicContext = create_ccflow_model( + "CallableModelContext", + __base__=ContextBase, + x=(int, ...), + multiplier=(int, 2), +) + +# Create a callable model using the dynamic context (defined inside a function +# to get in qualname for automatic registration) +def create_callable(): + class DynamicCallable(CallableModel): + @Flow.call + def __call__(self, context: DynamicContext) -> GenericResult: + return GenericResult(value=context.x * context.multiplier) + return DynamicCallable + +DynamicCallable = create_callable() +model = DynamicCallable() +ctx = DynamicContext(x=10, multiplier=3) + +# Verify it works +result = model(ctx) +assert result.value == 30 + +with open("{pkl_path}", "wb") as f: + dump((model, ctx), f) + +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout + + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + model, ctx = load(f) + +# Verify the callable works +result = model(ctx) +assert result.value == 30 + +# Verify type_ works for the dynamically created context (our focus) +assert ctx.type_.object is type(ctx) + +# Verify callable model type_ works (it should since it was defined in a function) +assert model.type_.object is type(model) + +print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout + + +class TestCreateCcflowModelRayTask: + """Tests for Ray task execution with dynamically created models.""" + + def test_create_ccflow_model_ray_task(self): + """Test that dynamically created models work in Ray tasks.""" + + DynamicContext = create_ccflow_model( + "RayTaskContext", + __base__=ContextBase, + name=(str, ...), + value=(int, 0), + ) + + @ray.remote + def process_context(ctx): + # Access fields and type_ inside Ray task + _ = ctx.type_ + return f"{ctx.name}:{ctx.value}" + + ctx = DynamicContext(name="ray_test", value=99) + + with ray.init(num_cpus=1): + result = ray.get(process_context.remote(ctx)) + + assert result == "ray_test:99" + + def test_create_ccflow_model_callable_model_ray_task(self): + """Test CallableModel with dynamically created context in Ray tasks.""" + + DynamicContext = create_ccflow_model( + "RayCallableContext", + __base__=ContextBase, + x=(int, ...), + ) + + class RayCallable(CallableModel): + factor: int = 2 + + @Flow.call + def __call__(self, context: DynamicContext) -> GenericResult: + return GenericResult(value=context.x * self.factor) + + @ray.remote + def run_callable(model, ctx): + result = model(ctx) + # Verify type_ works in Ray worker + _ = model.type_ + _ = ctx.type_ + return result.value + + model = RayCallable(factor=5) + ctx = DynamicContext(x=10) + + with ray.init(num_cpus=1): + result = ray.get(run_callable.remote(model, ctx)) + + assert result == 50 + + +class TestCreateCcflowModelEdgeCases: + """Tests for edge cases in create_ccflow_model wrapper.""" + + def test_create_ccflow_model_no_fields(self): + """Test create_ccflow_model with no custom fields.""" + + EmptyModel = create_ccflow_model("EmptyModel", __base__=BaseModel) + + assert issubclass(EmptyModel, BaseModel) + assert hasattr(EmptyModel, "__ccflow_import_path__") + + instance = EmptyModel() + assert instance.type_.object is EmptyModel + + def test_create_ccflow_model_with_module_override(self): + """Test create_ccflow_model with __module__ parameter.""" + + CustomModuleModel = create_ccflow_model( + "CustomModuleModel", + __base__=BaseModel, + __module__="custom.module.path", + value=(int, 0), + ) + + # Module should be overridden + assert CustomModuleModel.__module__ == "custom.module.path" + + # But should still be registered since it's not actually importable + assert hasattr(CustomModuleModel, "__ccflow_import_path__") + + def test_create_ccflow_model_inheritance_from_custom_base(self): + """Test create_ccflow_model inheriting from a custom ccflow class.""" + + # First create a base class + class CustomBase(ContextBase): + base_field: str = "base" + + DerivedModel = create_ccflow_model( + "DerivedModel", + __base__=CustomBase, + derived_field=(int, 0), + ) + + assert issubclass(DerivedModel, CustomBase) + assert issubclass(DerivedModel, ContextBase) + + instance = DerivedModel(derived_field=42) + assert instance.base_field == "base" + assert instance.derived_field == 42 + + def test_create_ccflow_model_special_characters_in_name(self): + """Test create_ccflow_model handles special characters in name.""" + + # Names with special characters should still work + SpecialModel = create_ccflow_model( + "Model-With-Dashes", + __base__=BaseModel, + value=(int, 0), + ) + + assert hasattr(SpecialModel, "__ccflow_import_path__") + + # The registered name should be sanitized + import_path = SpecialModel.__ccflow_import_path__ + registered_name = import_path.split(".")[-1] + # Should have sanitized the dashes + assert "-" not in registered_name + + def test_create_ccflow_model_returns_correct_type(self): + """Test that create_ccflow_model returns the model class, not an instance.""" + + result = create_ccflow_model( + "TypeCheckModel", + __base__=BaseModel, + value=(int, 0), + ) + + assert isinstance(result, type) + assert issubclass(result, BaseModel) + + def test_create_ccflow_model_import_at_top_level(self): + """Test that create_ccflow_model can be imported from ccflow.""" + from ccflow import create_ccflow_model as ccflow_create_model + from ccflow.local_persistence import create_ccflow_model as lp_create_model + + # Both should be the same function + assert ccflow_create_model is lp_create_model + + # And both should work + Model = ccflow_create_model("TopLevelImportModel", __base__=BaseModel, value=(int, 0)) + assert hasattr(Model, "__ccflow_import_path__") From 137985d42120827ff4711cafefeeac50f707a463 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 04:17:40 -0500 Subject: [PATCH 12/15] Register classes with module that is __main__, add a test Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 10 +++--- ccflow/tests/test_local_persistence.py | 46 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/ccflow/base.py b/ccflow/base.py index eb7eef8..888a45e 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -160,12 +160,14 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta @classmethod def __pydantic_init_subclass__(cls, **kwargs): super().__pydantic_init_subclass__(**kwargs) - # Register local-scope classes so they're importable via PyObjectPath. - # At definition time, we can only detect in qualname - full check deferred. + # Register local-scope classes and __main__ classes so they're importable via PyObjectPath. + # - Local classes ( in qualname) aren't importable via their qualname path + # - __main__ classes aren't importable cross-process (cloudpickle recreates them but + # doesn't add them to sys.modules["__main__"]) # Note: Cross-process unpickle sync (when __ccflow_import_path__ is already set) happens - # lazily via _register_local_subclass_if_needed, since cloudpickle sets class attributes + # lazily via _sync_to_module, since cloudpickle sets class attributes # AFTER __pydantic_init_subclass__ runs. - if "" in cls.__qualname__: + if "" in cls.__qualname__ or cls.__module__ == "__main__": _register(cls) @computed_field( diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 18dfed4..4b814be 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -485,6 +485,52 @@ def __call__(self, context: LocalContext) -> GenericResult: assert instance.type_.object is type(instance) assert context.type_.object is type(context) print("SUCCESS") +''' + load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) + assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + + def test_main_module_class_cross_process(self, pickle_file): + """Test that __main__ module classes work with cloudpickle cross-process. + + Classes defined in __main__ need registration because cloudpickle recreates + them but doesn't add them to sys.modules["__main__"] in the target process. + """ + pkl_path = pickle_file + + # Create a __main__ class and pickle it (class is at module level, not in a function) + create_code = f''' +from ray.cloudpickle import dump +from ccflow import ContextBase + +# This class is in __main__ (not inside a function) +class MainContext(ContextBase): + value: int + +# Verify it's registered (has __ccflow_import_path__) +assert hasattr(MainContext, "__ccflow_import_path__"), "Expected __main__ class to be registered" +assert MainContext.__module__ == "__main__" + +instance = MainContext(value=42) +type_path = instance.type_ # Verify type_ works before pickle + +with open("{pkl_path}", "wb") as f: + dump(instance, f) +print("SUCCESS") +''' + create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + + # Load in a different process - type_ should work + load_code = f''' +from ray.cloudpickle import load + +with open("{pkl_path}", "rb") as f: + obj = load(f) + +assert obj.value == 42 +type_path = obj.type_ # This would fail without registration +assert type_path.object is type(obj) +print("SUCCESS") ''' load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" From 36f6ba20a4600b708d1254f24dd145d8b4f1f3b5 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 04:32:42 -0500 Subject: [PATCH 13/15] Utilize parametrization to reduce the subprocess tests Signed-off-by: Nijat Khanbabayev --- ccflow/tests/test_local_persistence.py | 318 +++++++++++-------------- 1 file changed, 134 insertions(+), 184 deletions(-) diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index 4b814be..ed99842 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -327,100 +327,73 @@ def pickle_file(tmp_path): class TestCloudpickleCrossProcess: - """Tests for cross-process cloudpickle behavior (subprocess tests).""" + """Tests for cross-process cloudpickle behavior (subprocess tests). - def test_local_basemodel_cloudpickle_cross_process(self, pickle_file): - """Test that local-scope BaseModel subclasses work with cloudpickle cross-process.""" - pkl_path = pickle_file + These tests verify that ccflow classes (BaseModel, ContextBase, CallableModel) + with local or __main__ scope can be pickled in one process and unpickled in another, + with .type_ (PyObjectPath) working correctly after unpickle. + """ - # Create a local-scope BaseModel in subprocess 1 and pickle it - create_code = f''' + @pytest.mark.parametrize( + "create_code,load_code", + [ + pytest.param( + # Local-scope BaseModel + """ from ray.cloudpickle import dump from ccflow import BaseModel -def create_local_model(): +def create_local(): class LocalModel(BaseModel): value: int - return LocalModel -LocalModel = create_local_model() - -# Verify __qualname__ has '' (enables cloudpickle serialization) -assert "" in LocalModel.__qualname__, f"Expected '' in qualname: {{LocalModel.__qualname__}}" - -# Verify __ccflow_import_path__ is set (enables PyObjectPath) -assert hasattr(LocalModel, "__ccflow_import_path__"), "Expected __ccflow_import_path__ to be set" +LocalModel = create_local() +assert "" in LocalModel.__qualname__ +assert hasattr(LocalModel, "__ccflow_import_path__") -# Create instance and verify type_ works (PyObjectPath validation) instance = LocalModel(value=42) -type_path = instance.type_ -print(f"type_: {{type_path}}") +_ = instance.type_ -# Pickle the instance with open("{pkl_path}", "wb") as f: dump(instance, f) - -print("SUCCESS: Created and pickled") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" - assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" - - # Load in subprocess 2 (different process, class not pre-defined) - load_code = f''' +print("SUCCESS") +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: obj = load(f) -# Verify the value was preserved -assert obj.value == 42, f"Expected value=42, got {{obj.value}}" - -# Verify type_ works after unpickling (class was re-registered) -type_path = obj.type_ -print(f"type_: {{type_path}}") - -# Verify the import path works -import importlib -path_parts = str(type_path).rsplit(".", 1) -module = importlib.import_module(path_parts[0]) -cls = getattr(module, path_parts[1]) -assert cls is type(obj), "Import path should resolve to the same class" - -print("SUCCESS: Loaded and verified") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" - assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" - - def test_context_base_cross_process(self, pickle_file): - """Test cross-process cloudpickle for ContextBase subclasses.""" - pkl_path = pickle_file - - create_code = f''' +assert obj.value == 42 +assert obj.type_.object is type(obj) +print("SUCCESS") +""", + id="local_basemodel", + ), + pytest.param( + # Local-scope ContextBase + """ from ray.cloudpickle import dump from ccflow import ContextBase -def create_context(): +def create_local(): class LocalContext(ContextBase): name: str value: int return LocalContext -LocalContext = create_context() +LocalContext = create_local() assert "" in LocalContext.__qualname__ + instance = LocalContext(name="test", value=42) -_ = instance.type_ # Verify type_ works before pickle +_ = instance.type_ with open("{pkl_path}", "wb") as f: dump(instance, f) print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - load_code = f''' +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: @@ -428,22 +401,18 @@ class LocalContext(ContextBase): assert obj.name == "test" assert obj.value == 42 -type_path = obj.type_ # Verify type_ works after unpickle -assert type_path.object is type(obj) +assert obj.type_.object is type(obj) print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - def test_callable_model_cross_process(self, pickle_file): - """Test cross-process cloudpickle for CallableModel subclasses.""" - pkl_path = pickle_file - - create_code = f''' +""", + id="local_context", + ), + pytest.param( + # Local-scope CallableModel (also tests callable execution) + """ from ray.cloudpickle import dump from ccflow import CallableModel, ContextBase, GenericResult, Flow -def create_callable(): +def create_local(): class LocalContext(ContextBase): x: int @@ -456,84 +425,83 @@ def __call__(self, context: LocalContext) -> GenericResult: return LocalContext, LocalCallable -LocalContext, LocalCallable = create_callable() -instance = LocalCallable(multiplier=3) -context = LocalContext(x=10) - -# Verify it works -result = instance(context) +LocalContext, LocalCallable = create_local() +model = LocalCallable(multiplier=3) +ctx = LocalContext(x=10) +result = model(ctx) assert result.value == 30 with open("{pkl_path}", "wb") as f: - dump((instance, context), f) + dump((model, ctx), f) print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - load_code = f''' +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: - instance, context = load(f) + model, ctx = load(f) -# Verify the callable works after unpickle -result = instance(context) +result = model(ctx) assert result.value == 30 - -# Verify type_ works -assert instance.type_.object is type(instance) -assert context.type_.object is type(context) +assert model.type_.object is type(model) +assert ctx.type_.object is type(ctx) print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" - - def test_main_module_class_cross_process(self, pickle_file): - """Test that __main__ module classes work with cloudpickle cross-process. - - Classes defined in __main__ need registration because cloudpickle recreates - them but doesn't add them to sys.modules["__main__"] in the target process. - """ - pkl_path = pickle_file - - # Create a __main__ class and pickle it (class is at module level, not in a function) - create_code = f''' +""", + id="local_callable", + ), + pytest.param( + # __main__ module class (not inside a function) + # cloudpickle recreates but doesn't add to sys.modules["__main__"] + """ from ray.cloudpickle import dump from ccflow import ContextBase -# This class is in __main__ (not inside a function) class MainContext(ContextBase): value: int -# Verify it's registered (has __ccflow_import_path__) -assert hasattr(MainContext, "__ccflow_import_path__"), "Expected __main__ class to be registered" assert MainContext.__module__ == "__main__" +assert hasattr(MainContext, "__ccflow_import_path__") instance = MainContext(value=42) -type_path = instance.type_ # Verify type_ works before pickle +_ = instance.type_ with open("{pkl_path}", "wb") as f: dump(instance, f) print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - - # Load in a different process - type_ should work - load_code = f''' +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: obj = load(f) assert obj.value == 42 -type_path = obj.type_ # This would fail without registration -assert type_path.object is type(obj) +assert obj.type_.object is type(obj) print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) +""", + id="main_module", + ), + ], + ) + def test_cross_process_cloudpickle(self, pickle_file, create_code, load_code): + """Test that ccflow classes work with cloudpickle across processes.""" + pkl_path = pickle_file + + create_result = subprocess.run( + [sys.executable, "-c", create_code.format(pkl_path=pkl_path)], + capture_output=True, + text=True, + ) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout + + load_result = subprocess.run( + [sys.executable, "-c", load_code.format(pkl_path=pkl_path)], + capture_output=True, + text=True, + ) assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" + assert "SUCCESS" in load_result.stdout # ============================================================================= @@ -1173,18 +1141,18 @@ def test_cloudpickle_preserves_type_path(self): class TestCreateCcflowModelCloudpickleCrossProcess: - """Tests for cross-process cloudpickle with dynamically created models.""" - - def test_create_ccflow_model_cross_process(self, pickle_file): - """Test that dynamically created models work across processes.""" - pkl_path = pickle_file - - create_code = f''' + """Tests for cross-process cloudpickle with dynamically created models (via create_ccflow_model).""" + + @pytest.mark.parametrize( + "create_code,load_code", + [ + pytest.param( + # Context only + """ from ray.cloudpickle import dump from ccflow import ContextBase from ccflow.local_persistence import create_ccflow_model -# Create a dynamic context using the wrapper DynamicContext = create_ccflow_model( "CrossProcessContext", __base__=ContextBase, @@ -1192,59 +1160,36 @@ def test_create_ccflow_model_cross_process(self, pickle_file): value=(int, 0), ) -# Verify registration assert hasattr(DynamicContext, "__ccflow_import_path__") -# Create instance and verify type_ works ctx = DynamicContext(name="test", value=42) -type_path = str(ctx.type_) -print(f"type_: {{type_path}}") -assert "_Local_" in type_path +assert "_Local_" in str(ctx.type_) -# Pickle with open("{pkl_path}", "wb") as f: dump(ctx, f) - -print("SUCCESS: Created and pickled") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create subprocess failed: {create_result.stderr}" - assert "SUCCESS" in create_result.stdout, f"Create subprocess output: {create_result.stdout}" - - load_code = f''' +print("SUCCESS") +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: ctx = load(f) -# Verify values preserved -assert ctx.name == "test", f"Expected name='test', got {{ctx.name}}" -assert ctx.value == 42, f"Expected value=42, got {{ctx.value}}" - -# Verify type_ works after unpickle -type_path = str(ctx.type_) -print(f"type_: {{type_path}}") -assert "_Local_" in type_path - -# Verify the class can be resolved +assert ctx.name == "test" +assert ctx.value == 42 +assert "_Local_" in str(ctx.type_) assert ctx.type_.object is type(ctx) - -print("SUCCESS: Loaded and verified") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) - assert load_result.returncode == 0, f"Load subprocess failed: {load_result.stderr}" - assert "SUCCESS" in load_result.stdout, f"Load subprocess output: {load_result.stdout}" - - def test_create_ccflow_model_with_callable_model_cross_process(self, pickle_file): - """Test dynamically created contexts used with CallableModel across processes.""" - pkl_path = pickle_file - - create_code = f''' +print("SUCCESS") +""", + id="context_only", + ), + pytest.param( + # Dynamic context with CallableModel + """ from ray.cloudpickle import dump from ccflow import CallableModel, ContextBase, GenericResult, Flow from ccflow.local_persistence import create_ccflow_model -# Create a dynamic context DynamicContext = create_ccflow_model( "CallableModelContext", __base__=ContextBase, @@ -1252,8 +1197,6 @@ def test_create_ccflow_model_with_callable_model_cross_process(self, pickle_file multiplier=(int, 2), ) -# Create a callable model using the dynamic context (defined inside a function -# to get in qualname for automatic registration) def create_callable(): class DynamicCallable(CallableModel): @Flow.call @@ -1264,39 +1207,46 @@ def __call__(self, context: DynamicContext) -> GenericResult: DynamicCallable = create_callable() model = DynamicCallable() ctx = DynamicContext(x=10, multiplier=3) - -# Verify it works result = model(ctx) assert result.value == 30 with open("{pkl_path}", "wb") as f: dump((model, ctx), f) - print("SUCCESS") -''' - create_result = subprocess.run([sys.executable, "-c", create_code], capture_output=True, text=True) - assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" - assert "SUCCESS" in create_result.stdout - - load_code = f''' +""", + """ from ray.cloudpickle import load with open("{pkl_path}", "rb") as f: model, ctx = load(f) -# Verify the callable works result = model(ctx) assert result.value == 30 - -# Verify type_ works for the dynamically created context (our focus) assert ctx.type_.object is type(ctx) - -# Verify callable model type_ works (it should since it was defined in a function) assert model.type_.object is type(model) - print("SUCCESS") -''' - load_result = subprocess.run([sys.executable, "-c", load_code], capture_output=True, text=True) +""", + id="with_callable", + ), + ], + ) + def test_create_ccflow_model_cross_process(self, pickle_file, create_code, load_code): + """Test that dynamically created models work across processes.""" + pkl_path = pickle_file + + create_result = subprocess.run( + [sys.executable, "-c", create_code.format(pkl_path=pkl_path)], + capture_output=True, + text=True, + ) + assert create_result.returncode == 0, f"Create failed: {create_result.stderr}" + assert "SUCCESS" in create_result.stdout + + load_result = subprocess.run( + [sys.executable, "-c", load_code.format(pkl_path=pkl_path)], + capture_output=True, + text=True, + ) assert load_result.returncode == 0, f"Load failed: {load_result.stderr}" assert "SUCCESS" in load_result.stdout From 51534f27d3a00e7f9371b325e6e244f921842301 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 16:26:06 -0500 Subject: [PATCH 14/15] Use register_ccflow_import_path for local persistence Signed-off-by: Nijat Khanbabayev --- ccflow/base.py | 8 +-- ccflow/local_persistence.py | 29 +++++++--- ccflow/tests/test_base.py | 6 +-- ccflow/tests/test_local_persistence.py | 73 ++++++++++++++++++++++---- 4 files changed, 90 insertions(+), 26 deletions(-) diff --git a/ccflow/base.py b/ccflow/base.py index 888a45e..f718c21 100644 --- a/ccflow/base.py +++ b/ccflow/base.py @@ -30,7 +30,7 @@ from typing_extensions import Self from .exttypes.pyobjectpath import PyObjectPath -from .local_persistence import _register, _sync_to_module +from .local_persistence import register_ccflow_import_path, sync_to_module log = logging.getLogger(__name__) @@ -165,10 +165,10 @@ def __pydantic_init_subclass__(cls, **kwargs): # - __main__ classes aren't importable cross-process (cloudpickle recreates them but # doesn't add them to sys.modules["__main__"]) # Note: Cross-process unpickle sync (when __ccflow_import_path__ is already set) happens - # lazily via _sync_to_module, since cloudpickle sets class attributes + # lazily via sync_to_module, since cloudpickle sets class attributes # AFTER __pydantic_init_subclass__ runs. if "" in cls.__qualname__ or cls.__module__ == "__main__": - _register(cls) + register_ccflow_import_path(cls) @computed_field( alias="_target_", @@ -189,7 +189,7 @@ def type_(self) -> PyObjectPath: # Handle cross-process unpickle: cloudpickle sets __ccflow_import_path__ but # the class may not be on ccflow.local_persistence yet in this process if "__ccflow_import_path__" in cls.__dict__: - _sync_to_module(cls) + sync_to_module(cls) return PyObjectPath.validate(cls) # We want to track under what names a model has been registered diff --git a/ccflow/local_persistence.py b/ccflow/local_persistence.py index 5b0981f..39fb9c8 100644 --- a/ccflow/local_persistence.py +++ b/ccflow/local_persistence.py @@ -6,8 +6,8 @@ class definition. This module provides: -- _register(cls): Register a local class with a unique import path -- _sync_to_module(cls): Ensure a class with __ccflow_import_path__ is on the module +- register_ccflow_import_path(cls): Register a local class with a unique import path +- sync_to_module(cls): Ensure a class with __ccflow_import_path__ is on the module (used for cross-process unpickle scenarios) - create_ccflow_model: Wrapper around pydantic.create_model that registers the created model """ @@ -22,11 +22,15 @@ class definition. LOCAL_ARTIFACTS_MODULE_NAME = "ccflow.local_persistence" -def _register(cls: Type[Any]) -> None: - """Give cls a unique name and register it on the artifacts module. +def _register_on_module(cls: Type[Any], module_name: str) -> None: + """Register cls on the specified module with a unique name. This sets __ccflow_import_path__ on the class without modifying __module__ or __qualname__, preserving cloudpickle's ability to serialize the class definition. + + Args: + cls: The class to register. + module_name: The fully-qualified module name to register on (must be in sys.modules). """ # Sanitize the class name to be a valid Python identifier name = re.sub(r"[^0-9A-Za-z_]", "_", cls.__name__ or "Model").strip("_") or "Model" @@ -34,11 +38,20 @@ def _register(cls: Type[Any]) -> None: name = f"_{name}" unique = f"_Local_{name}_{uuid.uuid4().hex[:12]}" - setattr(sys.modules[LOCAL_ARTIFACTS_MODULE_NAME], unique, cls) - cls.__ccflow_import_path__ = f"{LOCAL_ARTIFACTS_MODULE_NAME}.{unique}" + setattr(sys.modules[module_name], unique, cls) + cls.__ccflow_import_path__ = f"{module_name}.{unique}" + + +def register_ccflow_import_path(cls: Type[Any]) -> None: + """Give cls a unique name and register it on ccflow.local_persistence. + + This sets __ccflow_import_path__ on the class without modifying __module__ or + __qualname__, preserving cloudpickle's ability to serialize the class definition. + """ + _register_on_module(cls, LOCAL_ARTIFACTS_MODULE_NAME) -def _sync_to_module(cls: Type[Any]) -> None: +def sync_to_module(cls: Type[Any]) -> None: """Ensure cls is registered on the artifacts module in this process. This handles cross-process unpickle scenarios where cloudpickle recreates the class @@ -77,6 +90,6 @@ def create_ccflow_model(__model_name: str, *, __base__: Any = None, **field_defi from ccflow.base import BaseModel if isinstance(model, type) and issubclass(model, BaseModel): - _register(model) + register_ccflow_import_path(model) return model diff --git a/ccflow/tests/test_base.py b/ccflow/tests/test_base.py index 84535f7..e53e0af 100644 --- a/ccflow/tests/test_base.py +++ b/ccflow/tests/test_base.py @@ -176,7 +176,7 @@ def test_widget(self): class TestLocalRegistration(TestCase): def test_local_class_registered_for_base_model(self): - with mock.patch("ccflow.base._register") as register: + with mock.patch("ccflow.base.register_ccflow_import_path") as register: class LocalModel(BaseModel): value: int @@ -186,7 +186,7 @@ class LocalModel(BaseModel): self.assertIn(LocalModel, calls) def test_local_class_registered_for_context(self): - with mock.patch("ccflow.base._register") as register: + with mock.patch("ccflow.base.register_ccflow_import_path") as register: class LocalContext(ContextBase): value: int @@ -195,7 +195,7 @@ class LocalContext(ContextBase): self.assertIn(LocalContext, calls) def test_local_class_registered_for_callable(self): - with mock.patch("ccflow.base._register") as register: + with mock.patch("ccflow.base.register_ccflow_import_path") as register: class LocalCallable(CallableModel): @Flow.call diff --git a/ccflow/tests/test_local_persistence.py b/ccflow/tests/test_local_persistence.py index ed99842..586b03f 100644 --- a/ccflow/tests/test_local_persistence.py +++ b/ccflow/tests/test_local_persistence.py @@ -7,7 +7,7 @@ Key behaviors tested: 1. Local classes get __ccflow_import_path__ set at definition time 2. Module-level classes are NOT registered (they're already importable) -3. Cross-process cloudpickle works via _sync_to_module +3. Cross-process cloudpickle works via sync_to_module 4. UUID-based naming provides uniqueness """ @@ -38,7 +38,7 @@ def __call__(self, context: NullContext) -> GenericResult: # ============================================================================= -# Tests for _register function +# Tests for register_ccflow_import_path function # ============================================================================= @@ -48,7 +48,7 @@ def test_base_module_available_after_import(): def test_register_preserves_module_qualname_and_sets_import_path(): - """Test that _register sets __ccflow_import_path__ without changing __module__ or __qualname__.""" + """Test that register_ccflow_import_path sets __ccflow_import_path__ without changing __module__ or __qualname__.""" def build(): class Foo: @@ -60,7 +60,7 @@ class Foo: original_module = Foo.__module__ original_qualname = Foo.__qualname__ - local_persistence._register(Foo) + local_persistence.register_ccflow_import_path(Foo) # __module__ and __qualname__ should NOT change (preserves cloudpickle) assert Foo.__module__ == original_module, "__module__ should not change" @@ -77,10 +77,10 @@ class Foo: def test_register_handles_class_name_starting_with_digit(): - """Test that _register handles class names starting with a digit by prefixing with underscore.""" + """Test that register_ccflow_import_path handles class names starting with a digit by prefixing with underscore.""" # Create a class with a name starting with a digit cls = type("3DModel", (), {}) - local_persistence._register(cls) + local_persistence.register_ccflow_import_path(cls) import_path = cls.__ccflow_import_path__ registered_name = import_path.split(".")[-1] @@ -95,7 +95,7 @@ def test_register_handles_class_name_starting_with_digit(): def test_sync_to_module_registers_class_not_yet_on_module(): - """Test that _sync_to_module registers a class that has __ccflow_import_path__ but isn't on the module yet. + """Test that sync_to_module registers a class that has __ccflow_import_path__ but isn't on the module yet. This happens in cross-process unpickle scenarios where cloudpickle recreates the class with __ccflow_import_path__ set, but the class isn't yet on ccflow.local_persistence. @@ -110,13 +110,64 @@ def test_sync_to_module_registers_class_not_yet_on_module(): module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] assert getattr(module, unique_name, None) is None, "Class should NOT be on module before sync" - # Call _sync_to_module - local_persistence._sync_to_module(cls) + # Call sync_to_module + local_persistence.sync_to_module(cls) # Verify class IS now on ccflow.local_persistence assert getattr(module, unique_name, None) is cls, "Class should be on module after sync" +# ============================================================================= +# Tests for _register_on_module (internal function) +# ============================================================================= + + +def test_register_on_module_uses_specified_module(): + """Test that _register_on_module registers on the specified module, not just LOCAL_ARTIFACTS_MODULE_NAME.""" + # Use a different module that's already in sys.modules + target_module_name = "ccflow.tests.test_local_persistence" + target_module = sys.modules[target_module_name] + + cls = type("CustomModuleClass", (), {}) + local_persistence._register_on_module(cls, target_module_name) + + # Verify import path uses the specified module name + import_path = cls.__ccflow_import_path__ + assert import_path.startswith(f"{target_module_name}._Local_"), f"Import path should start with {target_module_name}" + + # Verify class is registered on the specified module + registered_name = import_path.split(".")[-1] + assert getattr(target_module, registered_name) is cls, "Class should be on specified module" + + # Verify class is NOT on LOCAL_ARTIFACTS_MODULE_NAME + artifacts_module = sys.modules[local_persistence.LOCAL_ARTIFACTS_MODULE_NAME] + assert getattr(artifacts_module, registered_name, None) is None, "Class should NOT be on artifacts module" + + # Cleanup + delattr(target_module, registered_name) + + +def test_register_on_module_import_path_format(): + """Test that _register_on_module produces correctly formatted import paths.""" + target_module_name = "ccflow.tests.test_local_persistence" + target_module = sys.modules[target_module_name] + + cls = type("FormatTestClass", (), {}) + local_persistence._register_on_module(cls, target_module_name) + + import_path = cls.__ccflow_import_path__ + + # Import path should be: module_name._Local_ClassName_uuid + parts = import_path.rsplit(".", 1) + assert len(parts) == 2, "Import path should have module.name format" + assert parts[0] == target_module_name, "Module part should match target" + assert parts[1].startswith("_Local_FormatTestClass_"), "Name should follow _Local_ClassName_uuid pattern" + assert len(parts[1].split("_")[-1]) == 12, "UUID suffix should be 12 hex chars" + + # Cleanup + delattr(target_module, parts[1]) + + # ============================================================================= # Tests for local class registration via BaseModel # ============================================================================= @@ -926,7 +977,7 @@ def test_local_class_registered_immediately(self): from unittest import mock # Must patch where it's used (base.py), not where it's defined (local_persistence) - with mock.patch("ccflow.base._register") as mock_do_reg: + with mock.patch("ccflow.base.register_ccflow_import_path") as mock_do_reg: def create_local(): class LocalModel(BaseModel): @@ -936,7 +987,7 @@ class LocalModel(BaseModel): LocalModel = create_local() - # _register SHOULD be called immediately for local classes + # register_ccflow_import_path SHOULD be called immediately for local classes mock_do_reg.assert_called_once() # Verify it has in qualname assert "" in LocalModel.__qualname__ From a56d526ce763497137b2e2cd0bfa427323dd5831 Mon Sep 17 00:00:00 2001 From: Nijat Khanbabayev Date: Wed, 31 Dec 2025 16:36:52 -0500 Subject: [PATCH 15/15] Update docstrings Signed-off-by: Nijat Khanbabayev --- ccflow/exttypes/pyobjectpath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ccflow/exttypes/pyobjectpath.py b/ccflow/exttypes/pyobjectpath.py index 178c4cb..0612fa0 100644 --- a/ccflow/exttypes/pyobjectpath.py +++ b/ccflow/exttypes/pyobjectpath.py @@ -25,7 +25,7 @@ def import_string(dotted_path: str) -> Any: parts = dotted_path.split(".") # Try progressively shorter module paths - # e.g., for 'a.b.C.D', try 'a.b.C', then 'a.b', then 'a' + # e.g., for 'a.b.C.D', try 'a.b.C.D', then 'a.b.C', then 'a.b', then 'a' for i in range(len(parts), 0, -1): module_path = ".".join(parts[:i]) try: @@ -120,7 +120,7 @@ def _validate(cls, value: Any): def _path_from_object(cls, value: Any) -> "PyObjectPath": """Build import path from an object. - For ccflow BaseModel subclasses with __ccflow_import_path__ set (local classes), + For classes with __ccflow_import_path__ set (local classes), uses that path. Otherwise uses the standard module.qualname path. """ if isinstance(value, type):