diff --git a/README.md b/README.md index 2ef8e28..3c1542d 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,23 @@ This package provides a complete Python implementation of CIRCE-BE with: - Measurement, Observation - Visit Occurrence/Detail - Device Exposure, Specimen - - Death, Location Region - - Observation Period, Payer Plan Period - - And more... -- **Full cohort expression validation** with comprehensive error checking -- **Markdown rendering** for human-readable cohort descriptions -- **Complete CLI interface** with 4 commands (validate, generate-sql, render-markdown, process) + - Specimen, Death + - Payer Plan Period, Location Region +- **Full Cohort Expression Validation** with 40+ checker implementations +- **Markdown Rendering** for human-readable descriptions +- **Complete CLI Interface** for validation, SQL, and rendering +- **Extension System** to support custom CDM domains + +## Extensions + +`circe_py` includes a powerful extension system that allows adding support for custom CDM domains. + +Included Extensions: + +- **OHDSI Waveform Extension**: Support for the OHDSI Waveform Extension specification (waveform_occurrence, waveform_registry, waveform_channel_metadata, waveform_feature). See [waveform_extension/README.md](waveform_extension/README.md). + +For information on how to implement your own extension, see the [Developer Guide for Extensions](docs/developer/extensions.rst). + - **Java interoperability** - supports both camelCase and snake_case field names for seamless Java CIRCE-BE compatibility ## ⚠️ Java Fidelity Requirement diff --git a/circe/cohortdefinition/builders/__init__.py b/circe/cohortdefinition/builders/__init__.py index a9b6b64..2a5b555 100644 --- a/circe/cohortdefinition/builders/__init__.py +++ b/circe/cohortdefinition/builders/__init__.py @@ -11,6 +11,7 @@ from .utils import BuilderUtils, BuilderOptions, CriteriaColumn from .base import CriteriaSqlBuilder +from circe.extensions import get_registry from .condition_occurrence import ConditionOccurrenceSqlBuilder from .drug_exposure import DrugExposureSqlBuilder from .procedure_occurrence import ProcedureOccurrenceSqlBuilder @@ -28,6 +29,12 @@ from .visit_detail import VisitDetailSqlBuilder from .location_region import LocationRegionSqlBuilder +# Extension support +def get_builder_for_criteria(criteria): + """Get a SQL builder for a criteria instance, checking extensions first.""" + registry = get_registry() + return registry.get_builder(criteria) + __all__ = [ # Utility classes "BuilderUtils", "BuilderOptions", "CriteriaColumn", @@ -51,5 +58,6 @@ "ObservationPeriodSqlBuilder", "PayerPlanPeriodSqlBuilder", "VisitDetailSqlBuilder", - "LocationRegionSqlBuilder" + "LocationRegionSqlBuilder", + "get_builder_for_criteria" ] diff --git a/circe/cohortdefinition/cohort_expression_query_builder.py b/circe/cohortdefinition/cohort_expression_query_builder.py index b635cbb..e30f831 100644 --- a/circe/cohortdefinition/cohort_expression_query_builder.py +++ b/circe/cohortdefinition/cohort_expression_query_builder.py @@ -26,9 +26,10 @@ ConditionOccurrenceSqlBuilder, DeathSqlBuilder, DeviceExposureSqlBuilder, MeasurementSqlBuilder, ObservationSqlBuilder, SpecimenSqlBuilder, VisitOccurrenceSqlBuilder, DrugExposureSqlBuilder, ProcedureOccurrenceSqlBuilder, - ConditionEraSqlBuilder, DrugEraSqlBuilder, DoseEraSqlBuilder, ObservationPeriodSqlBuilder, PayerPlanPeriodSqlBuilder, - VisitDetailSqlBuilder, LocationRegionSqlBuilder + ConditionEraSqlBuilder, DrugEraSqlBuilder, DoseEraSqlBuilder, ObservationPeriodSqlBuilder, PayerPlanPeriodSqlBuilder, + VisitDetailSqlBuilder, LocationRegionSqlBuilder, get_builder_for_criteria ) +from circe.extensions import get_registry from .interfaces import IGetCriteriaSqlDispatcher, IGetEndStrategySqlDispatcher from .concept_set_expression_query_builder import ConceptSetExpressionQueryBuilder @@ -1296,7 +1297,19 @@ def get_criteria_sql(self, criteria: Criteria, options: Optional[BuilderOptions] 'DoseEra': DoE, } - if criteria_type in criteria_class_map: + # Check if it's a registered extension criteria + registry = get_registry() + if criteria_type and criteria_type in registry._criteria_classes: + try: + criteria_data = dict(criteria_data) if criteria_data else {} + # Add defaults if needed + if 'first' not in criteria_data or criteria_data.get('first') is None: + criteria_data['first'] = False + + criteria = registry._criteria_classes[criteria_type].model_validate(criteria_data, strict=False) + except Exception as e: + raise ValueError(f"Failed to deserialize extension criteria: {criteria_type} - {e}") + elif criteria_type in criteria_class_map: try: # Make a mutable copy to add defaults criteria_data = dict(criteria_data) if criteria_data else {} @@ -1315,10 +1328,16 @@ def get_criteria_sql(self, criteria: Criteria, options: Optional[BuilderOptions] criteria = criteria_class_map[criteria_type].model_validate(criteria_data, strict=False) except Exception as e: raise ValueError(f"Failed to deserialize criteria from dict: {criteria_type} - {e}") - else: - raise ValueError(f"Unknown criteria type in dict: {criteria_type}") else: - raise ValueError(f"Invalid criteria dict structure: {criteria}") + raise ValueError(f"Unknown criteria type in dict: {criteria_type}") + else: + if isinstance(criteria, dict): + raise ValueError(f"Invalid criteria dict structure: {criteria}") + + # Check for extension builder first + extension_builder = get_builder_for_criteria(criteria) + if extension_builder: + return self._get_criteria_sql_from_builder(extension_builder, criteria, options) # Import here to avoid circular dependency - use the already imported names if isinstance(criteria, ConditionOccurrence): diff --git a/circe/cohortdefinition/criteria.py b/circe/cohortdefinition/criteria.py index f0f58de..d3b0fd2 100644 --- a/circe/cohortdefinition/criteria.py +++ b/circe/cohortdefinition/criteria.py @@ -8,8 +8,8 @@ Reference: JAVA_CLASS_MAPPINGS.md for Java equivalents. """ -from typing import List, Optional, Any, ClassVar, Union, TYPE_CHECKING -from pydantic import BaseModel, Field, ConfigDict, model_serializer, AliasChoices, field_validator +from typing import List, Optional, Any, ClassVar, Union, TYPE_CHECKING, Annotated +from pydantic import BaseModel, Field, ConfigDict, model_serializer, AliasChoices, field_validator, BeforeValidator from enum import Enum from ..vocabulary.concept import Concept from .core import ( @@ -218,13 +218,26 @@ class Criteria(CirceBaseModel): @model_serializer(mode='wrap') def _serialize_polymorphic(self, serializer, info): """Serialize with polymorphic type wrapper for Java compatibility.""" - # Get the serialized data using default serialization - data = serializer(self) - # Wrap in class name for polymorphic deserialization in Java - # Only wrap if this is a subclass (not the base Criteria class) - if self.__class__.__name__ != 'Criteria': - return {self.__class__.__name__: data} - return data + if self.__class__.__name__ == 'Criteria': + return serializer(self) + + # For subclasses (extensions), we want to ensure all fields are included + # even if serialized via a base class Union link. + # We manually build the dict to avoid infinite recursion with model_dump() + data = {} + for field_name, field_info in self.model_fields.items(): + value = getattr(self, field_name) + if value is not None: + # Use serialization_alias if it exists, otherwise use field name + # Note: alias_generator (PascalCase) is handled via serialization_alias + # effectively if we use the right property. + # In Pydantic V2, serialization_alias is often the PascalCase version if configured. + alias = field_info.serialization_alias or field_name + # If it's a generic field without explicit alias, it might need PascalCase + # but most CIRCE fields have explicit aliases. + data[alias] = value + + return {self.__class__.__name__: data} def accept(self, dispatcher: Any, options: Optional[Any] = None) -> str: """Accept method for visitor pattern.""" @@ -1204,14 +1217,32 @@ def normalize_window(window_dict: dict) -> dict: # Define CriteriaType Union for strict typing -CriteriaType = Union[ +# Define CriteriaType Union for strict typing +_CriteriaTypeUnion = Union[ ConditionOccurrence, DrugExposure, ProcedureOccurrence, VisitOccurrence, Observation, Measurement, DeviceExposure, Specimen, Death, VisitDetail, ObservationPeriod, PayerPlanPeriod, LocationRegion, ConditionEra, - DrugEra, DoseEra + DrugEra, DoseEra, Criteria ] +def _validate_criteria_extension(v: Any) -> Any: + """Validate criteria checking extensions registry for custom types.""" + if isinstance(v, dict) and len(v) == 1: + key = next(iter(v)) + try: + from circe.extensions import get_registry + registry = get_registry() + cls = registry.get_criteria_class(key) + if cls: + # Found registered criteria class, deserialize it + return cls.model_validate(v[key]) + except ImportError: + pass + return v + +CriteriaType = Annotated[_CriteriaTypeUnion, BeforeValidator(_validate_criteria_extension)] + # Map for dynamic lookup NAMES_TO_CLASSES = { 'ConditionOccurrence': ConditionOccurrence, diff --git a/circe/cohortdefinition/printfriendly/markdown_render.py b/circe/cohortdefinition/printfriendly/markdown_render.py index 5b3d2a9..81fd823 100644 --- a/circe/cohortdefinition/printfriendly/markdown_render.py +++ b/circe/cohortdefinition/printfriendly/markdown_render.py @@ -31,20 +31,36 @@ class MarkdownRender: subdirectory and mirror the structure of Java's .ftl files. """ - def __init__(self, concept_sets: Optional[List[ConceptSet]] = None, include_concept_sets: bool = False): + def __init__(self, concept_sets: Optional[List[ConceptSet]] = None, include_concept_sets: bool = False, template_paths: Optional[List[Path]] = None): """Initialize the markdown renderer. Args: concept_sets: Optional list of concept sets for resolving codeset IDs to names include_concept_sets: Whether to include concept set tables in the output (default: False) + template_paths: Optional list of additional template directories to search """ self._concept_sets = concept_sets or [] self._include_concept_sets = include_concept_sets - # Initialize Jinja2 environment - template_dir = Path(__file__).parent / 'templates' + # Initialize Jinja2 environment with multiple loaders + built_in_template_dir = Path(__file__).parent / 'templates' + + # Start with built-in templates + loaders = [jinja2.FileSystemLoader(str(built_in_template_dir))] + + # Add user provided paths + if template_paths: + for path in template_paths: + loaders.append(jinja2.FileSystemLoader(str(path))) + + # Add registry paths + from circe.extensions import get_registry + registry = get_registry() + for path in registry.template_paths: + loaders.append(jinja2.FileSystemLoader(str(path))) + self._env = jinja2.Environment( - loader=jinja2.FileSystemLoader(str(template_dir)), + loader=jinja2.ChoiceLoader(loaders), trim_blocks=True, lstrip_blocks=True, autoescape=False # We're generating markdown, not HTML @@ -54,6 +70,12 @@ def __init__(self, concept_sets: Optional[List[ConceptSet]] = None, include_conc self._env.filters['format_date'] = self._format_date self._env.filters['format_number'] = self._format_number + # Add extension helper to look up template name for a criteria instance + def get_template_for_criteria(criteria): + return registry.get_template(criteria) + + self._env.globals['get_template_for_criteria'] = get_template_for_criteria + # Register global functions self._env.globals['codeset_name'] = self._codeset_name self._env.globals['format_date'] = self._format_date diff --git a/circe/cohortdefinition/printfriendly/templates/criteria_types.j2 b/circe/cohortdefinition/printfriendly/templates/criteria_types.j2 index 2e7c9ce..927cf6d 100644 --- a/circe/cohortdefinition/printfriendly/templates/criteria_types.j2 +++ b/circe/cohortdefinition/printfriendly/templates/criteria_types.j2 @@ -11,7 +11,12 @@ ============================================ #} {%- macro Criteria(c, level=0, isPlural=true, countCriteria={}, indexLabel="cohort entry") -%} {%- set type_name = c.__class__.__name__ -%} - {%- if type_name == "ConditionEra" -%}{{ ConditionEra(c, level, isPlural, countCriteria, indexLabel) }} + {%- set custom_template = get_template_for_criteria(c) -%} + {%- if custom_template -%} + {%- with criteria=c, level=level, isPlural=isPlural, countCriteria=countCriteria, indexLabel=indexLabel -%} + {%- include custom_template -%} + {%- endwith -%} + {%- elif type_name == "ConditionEra" -%}{{ ConditionEra(c, level, isPlural, countCriteria, indexLabel) }} {%- elif type_name == "ConditionOccurrence" -%}{{ ConditionOccurrence(c, level, isPlural, countCriteria, indexLabel) }} {%- elif type_name == "Death" -%}{{ Death(c, level, isPlural, countCriteria, indexLabel) }} {%- elif type_name == "DeviceExposure" -%}{{ DeviceExposure(c, level, isPlural, countCriteria, indexLabel) }} diff --git a/circe/extensions.py b/circe/extensions.py new file mode 100644 index 0000000..ce26a4d --- /dev/null +++ b/circe/extensions.py @@ -0,0 +1,115 @@ +""" +Extension Registry for OMOP CDM. + +This module provides the central registry for managing extensions to circe-py, +allowing external projects to register custom criteria classes, SQL builders, +and markdown renderers. +""" +from typing import Dict, List, Optional, Type, Set, Union +from pathlib import Path + +# Forward references to avoid circular imports +# Actual imports happen inside methods or with TYPE_CHECKING +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .cohortdefinition.criteria import Criteria + from .cohortdefinition.builders.base import CriteriaSqlBuilder + +class ExtensionRegistry: + """Central registry for OMOP CDM extensions.""" + + def __init__(self): + # Maps criteria names to criteria classes (for JSON deserialization) + self._criteria_classes: Dict[str, Type['Criteria']] = {} + + # Maps criteria types to SQL builder classes + self._sql_builders: Dict[Type['Criteria'], Type['CriteriaSqlBuilder']] = {} + + # Maps criteria types to markdown template names + self._markdown_templates: Dict[Type['Criteria'], str] = {} + + # List of paths to search for Jinja2 templates + self._template_paths: List[Path] = [] + + def register_criteria_class(self, name: str, cls: Type['Criteria']) -> None: + """Register a new criteria class for JSON deserialization. + + Args: + name: The name of the criteria type (e.g. "WaveformOccurrence") + cls: The Criteria subclass + """ + self._criteria_classes[name] = cls + + def register_sql_builder(self, criteria_cls: Type['Criteria'], builder_cls: Type['CriteriaSqlBuilder']) -> None: + """Register a SQL builder for a criteria type. + + Args: + criteria_cls: The Criteria subclass + builder_cls: The CriteriaSqlBuilder subclass + """ + self._sql_builders[criteria_cls] = builder_cls + + def register_markdown_template(self, criteria_cls: Type['Criteria'], template_name: str) -> None: + """Register a Jinja2 template for markdown rendering. + + Args: + criteria_cls: The Criteria subclass + template_name: The name of the template file (e.g. "waveform_occurrence.j2") + """ + self._markdown_templates[criteria_cls] = template_name + + def add_template_path(self, path: Path) -> None: + """Add a path to search for Jinja2 templates. + + Args: + path: Path to a directory containing Jinja2 templates + """ + if path not in self._template_paths: + self._template_paths.append(path) + + def get_builder(self, criteria: 'Criteria') -> Optional['CriteriaSqlBuilder']: + """Get the SQL builder for a criteria instance. + + Args: + criteria: The criteria instance + + Returns: + An instance of the registered SQL builder, or None if not found + """ + builder_cls = self._sql_builders.get(type(criteria)) + return builder_cls() if builder_cls else None + + def get_template(self, criteria: 'Criteria') -> Optional[str]: + """Get the markdown template name for a criteria instance. + + Args: + criteria: The criteria instance + + Returns: + The template name, or None if not found + """ + return self._markdown_templates.get(type(criteria)) + + def get_criteria_class(self, name: str) -> Optional[Type['Criteria']]: + """Get a registered criteria class by name. + + Args: + name: The name of the criteria type + + Returns: + The Criteria subclass, or None if not found + """ + return self._criteria_classes.get(name) + + @property + def template_paths(self) -> List[Path]: + """Get all registered template paths.""" + return list(self._template_paths) + +# Global registry instance +_registry = ExtensionRegistry() + +def get_registry() -> ExtensionRegistry: + """Get the global extension registry instance.""" + return _registry diff --git a/docs/developer/extensions.rst b/docs/developer/extensions.rst new file mode 100644 index 0000000..69904f5 --- /dev/null +++ b/docs/developer/extensions.rst @@ -0,0 +1,155 @@ +Extending circe_py +=================== + +This guide explains how to extend `circe_py` with custom criteria types. This is useful when you have data in your CDM that isn't part of the standard OMOP domains (e.g., weather data, genomic features, or specialized clinical registries). + +Architecture Overview +--------------------- + +The extension system consists of three main components: + +1. **Criteria Class**: A Pydantic model that defines the fields available in your new criteria. +2. **SQL Builder**: A class that translates your criteria into SQL. +3. **Markdown Template**: A Jinja2 template that generates a human-readable description. + +Registration is handled by the `ExtensionRegistry`. + +Example: Weather Conditions +--------------------------- + +Imagine you want to create a cohort based on weather conditions (e.g., "Patients diagnosed with asthma during extreme cold"). + +Step 1: Define the Criteria Class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Your class must inherit from `circe.cohortdefinition.criteria.Criteria`. Use Pydantic's `Field` and `AliasChoices` to maintain compatibility with both Pythonic (`snake_case`) and Java-style (`PascalCase`) field names. + +.. code-block:: python + + from typing import Optional, List + from pydantic import Field, AliasChoices + from circe.cohortdefinition.criteria import Criteria, CriteriaGroup + from circe.vocabulary.concept import Concept + + class WeatherCondition(Criteria): + """Criteria for weather data linked to persons.""" + weather_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("WeatherConceptId", "weatherConceptId"), + serialization_alias="WeatherConceptId" + ) + temperature_celsius: Optional[float] = Field( + default=None, + validation_alias=AliasChoices("TemperatureCelsius", "temperatureCelsius"), + serialization_alias="TemperatureCelsius" + ) + + # Resolve forward references (required for complex criteria types) + WeatherCondition.model_rebuild() + +Step 2: Implement the SQL Builder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The SQL builder must inherit from `circe.cohortdefinition.builders.base.CriteriaSqlBuilder`. + +.. code-block:: python + + from typing import Set + from circe.cohortdefinition.builders.base import CriteriaSqlBuilder + from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderOptions + + class WeatherConditionSqlBuilder(CriteriaSqlBuilder[WeatherCondition]): + def get_query_template(self) -> str: + return """ + SELECT C.person_id, C.weather_id as event_id, C.observation_date as start_date, C.observation_date as end_date, + NULL as visit_occurrence_id, C.observation_date as sort_date + FROM @cdm_database_schema.weather_data C + WHERE @whereClause + """ + + def get_default_columns(self) -> Set[CriteriaColumn]: + return {CriteriaColumn.START_DATE, CriteriaColumn.END_DATE} + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + if column == CriteriaColumn.START_DATE: + return "C.observation_date" + elif column == CriteriaColumn.END_DATE: + return "C.observation_date" + raise ValueError(f"Unsupported column: {column}") + + def get_criteria_sql_with_options(self, criteria: WeatherCondition, options: BuilderOptions) -> str: + query = self.get_query_template() + where_clauses = ["1=1"] + + if criteria.weather_concept_id: + ids = [str(c.concept_id) for c in criteria.weather_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.weather_concept_id IN ({','.join(ids)})") + + if criteria.temperature_celsius is not None: + where_clauses.append(f"C.temp_c >= {criteria.temperature_celsius}") + + query = query.replace("@cdm_database_schema", options.cdm_database_schema) + query = query.replace("@whereClause", " AND ".join(where_clauses)) + return query + +Step 3: Register the Extension +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the extension registry to link your classes and templates. + +.. code-block:: python + + from circe.extensions import get_registry + from pathlib import Path + + def register_weather_extension(): + registry = get_registry() + + # 1. Register the Criteria Class + registry.register_criteria_class("WeatherCondition", WeatherCondition) + + # 2. Register the SQL Builder + registry.register_sql_builder(WeatherCondition, WeatherConditionSqlBuilder) + + # 3. Register Markdown Template + # Ensure templates/weather_condition.j2 exists + template_path = Path(__file__).parent / "templates" + registry.add_template_path(template_path) + registry.register_markdown_template(WeatherCondition, "weather_condition.j2") + +Step 4: Create a Markdown Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a file named `weather_condition.j2`: + +.. code-block:: jinja + + Weather condition: {{ criteria.weather_concept_id[0].concept_name if criteria.weather_concept_id else 'Any' }} + {% if criteria.temperature_celsius %} with temperature >= {{ criteria.temperature_celsius }}°C{% endif %}. + +Full End-to-End Usage +--------------------- + +Once registered, you can use your custom criteria just like any built-in type. + +.. code-block:: python + + from circe.cohortdefinition import CohortExpression, PrimaryCriteria + from circe.vocabulary.concept import Concept + + # Setup + register_weather_extension() + + # Define cohort + weather_criteria = WeatherCondition( + weather_concept_id=[Concept(concept_id=123, concept_name="Snowing")], + temperature_celsius=-5.0 + ) + + cohort = CohortExpression( + primary_criteria=PrimaryCriteria(criteria_list=[weather_criteria]) + ) + + # generate SQL or Markdown as usual + # ... diff --git a/docs/index.rst b/docs/index.rst index 8a6e6af..5740564 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,6 +51,7 @@ A Python implementation of the OHDSI CIRCE-BE (Cohort Inclusion and Restriction developer/contributing developer/architecture + developer/extensions developer/testing developer/release diff --git a/tests/test_extension_system.py b/tests/test_extension_system.py new file mode 100644 index 0000000..7d5a1ef --- /dev/null +++ b/tests/test_extension_system.py @@ -0,0 +1,179 @@ +import pytest +import json +from typing import Optional, List, Set +from pydantic import Field, AliasChoices + +from circe.cohortdefinition import CohortExpression, PrimaryCriteria, CriteriaGroup +from circe.cohortdefinition.criteria import Criteria +from circe.cohortdefinition.builders.base import CriteriaSqlBuilder +from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderUtils, BuilderOptions +from circe.cohortdefinition.cohort_expression_query_builder import CohortExpressionQueryBuilder, BuildExpressionQueryOptions +from circe.cohortdefinition.printfriendly.markdown_render import MarkdownRender +from circe.vocabulary.concept import Concept +from circe.extensions import get_registry + +# ----------------------------------------------------------------------------- +# 1. Define the Extension Components +# ----------------------------------------------------------------------------- + +class WeatherCondition(Criteria): + """ + Example extension criteria for 'Weather Conditions'. + Imagine a CDM extension where weather data is linked to persons. + """ + weather_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("WeatherConceptId", "weatherConceptId"), + serialization_alias="WeatherConceptId" + ) + temperature_celsius: Optional[float] = Field( + default=None, + validation_alias=AliasChoices("TemperatureCelsius", "temperatureCelsius"), + serialization_alias="TemperatureCelsius" + ) + +# Important: Rebuild models to resolve forward references inherited from Criteria +WeatherCondition.model_rebuild() + +class WeatherConditionSqlBuilder(CriteriaSqlBuilder[WeatherCondition]): + """ + SQL Builder for WeatherCondition. + """ + def get_query_template(self) -> str: + return """ +SELECT C.person_id, C.weather_id as event_id, C.observation_date as start_date, C.observation_date as end_date, + NULL as visit_occurrence_id, C.observation_date as sort_date +FROM @cdm_database_schema.weather_data C +WHERE @whereClause +""" + + def get_default_columns(self) -> Set[CriteriaColumn]: + return {CriteriaColumn.START_DATE, CriteriaColumn.END_DATE} + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + if column == CriteriaColumn.START_DATE: + return "C.observation_date" + elif column == CriteriaColumn.END_DATE: + return "C.observation_date" + else: + raise ValueError(f"Unsupported column: {column}") + + def get_criteria_sql_with_options(self, criteria: WeatherCondition, options: BuilderOptions) -> str: + query = self.get_query_template() + where_clauses = ["1=1"] + + if criteria.weather_concept_id: + ids = [str(c.concept_id) for c in criteria.weather_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.weather_concept_id IN ({','.join(ids)})") + + if criteria.temperature_celsius is not None: + where_clauses.append(f"C.temp_c >= {criteria.temperature_celsius}") + + query = query.replace("@cdm_database_schema", options.cdm_database_schema if options else "@cdm_database_schema") + query = query.replace("@whereClause", " AND ".join(where_clauses)) + return query + +# ----------------------------------------------------------------------------- +# 2. Test Cases +# ----------------------------------------------------------------------------- + +def test_simple_extension_integration(tmp_path): + """ + Full end-to-end test of the extension system. + """ + registry = get_registry() + + # Register the extension + registry.register_criteria_class("WeatherCondition", WeatherCondition) + registry.register_sql_builder(WeatherCondition, WeatherConditionSqlBuilder) + + # Create a dummy template file + template_dir = tmp_path / "templates" + template_dir.mkdir() + template_file = template_dir / "weather_condition.j2" + template_file.write_text(""" +Weather condition: {{ criteria.weather_concept_id[0].concept_name if criteria.weather_concept_id else 'Any' }} +{% if criteria.temperature_celsius %} with temperature >= {{ criteria.temperature_celsius }}°C{% endif %}. +""") + + registry.add_template_path(template_dir) + registry.register_markdown_template(WeatherCondition, "weather_condition.j2") + + # Construct a cohort using the extension + weather_concept = Concept(concept_id=123, concept_name="Snowing", standard_concept="S", concept_code="SNOW") + weather_criteria = WeatherCondition( + weather_concept_id=[weather_concept], + temperature_celsius=-5.0 + ) + + expression = CohortExpression( + primary_criteria=PrimaryCriteria( + criteria_list=[weather_criteria], + observation_window={"priorDays": 0, "postDays": 0}, + primary_limit={"type": "First"} + ), + concept_sets=[], + inclusion_rules=[], + qualified_limit={"type": "First"} + ) + + # 1. Test SQL Generation + builder = CohortExpressionQueryBuilder() + sql_options = BuildExpressionQueryOptions() + sql_options.cdm_schema = "my_cdm" + sql = builder.build_expression_query(expression, sql_options) + + assert "weather_data" in sql + assert "weather_concept_id IN (123)" in sql + assert "temp_c >= -5.0" in sql + + # 2. Test Markdown Rendering + renderer = MarkdownRender() + markdown = renderer.render_cohort_expression(expression) + + assert "Weather condition: Snowing" in markdown + assert "temperature >= -5.0°C" in markdown + + # 3. Test JSON Serialization/Deserialization (Round-trip) + # This verifies that Pydantic uses the registry to find the class + json_str = expression.model_dump_json(by_alias=True) + loaded_expression = CohortExpression.model_validate_json(json_str) + + # Check that it loaded as a WeatherCondition object, not a generic Criteria or dict + loaded_criteria = loaded_expression.primary_criteria.criteria_list[0] + assert isinstance(loaded_criteria, WeatherCondition) + assert loaded_criteria.temperature_celsius == -5.0 + assert loaded_criteria.weather_concept_id[0].concept_name == "Snowing" + +def test_unregistered_extension_fails(): + """ + Verifies that using an unregistered extension key in JSON doesn't result + in a custom extension object. It will instead fall back to a standard + Criteria type (like ConditionOccurrence) because all of them have optional + fields and ignore extra fields. + """ + # Using a key that is NOT registered + bad_json_str = json.dumps({ + "PrimaryCriteria": { + "CriteriaList": [ + { + "UnregisteredKey": { + "SomeSpecificField": "Value" + } + } + ], + "ObservationWindow": {"PriorDays": 0, "PostDays": 0}, + "PrimaryLimit": {"Type": "First"} + } + }) + + loaded = CohortExpression.model_validate_json(bad_json_str) + item = loaded.primary_criteria.criteria_list[0] + + # It should NOT be a WeatherCondition (because it's not registered) + assert not isinstance(item, WeatherCondition) + + # It will likely be a ConditionOccurrence because it's first in the Union + # and all fields are optional with extra='ignore'. + assert not hasattr(item, "SomeSpecificField") diff --git a/waveform_extension/README.md b/waveform_extension/README.md new file mode 100644 index 0000000..5b300bb --- /dev/null +++ b/waveform_extension/README.md @@ -0,0 +1,128 @@ +# OHDSI Waveform Extension for circe_py + +This extension implements the full [OHDSI Waveform Extension specification](https://ohdsi.github.io/WaveformWG/waveform-tables.html) for cohort definition and SQL generation in circe_py. + +## Tables Implemented + +The extension provides criteria classes and SQL builders for all 4 waveform tables: + +1. **waveform_occurrence** - Clinical and temporal context for recording sessions +2. **waveform_registry** - File metadata (format, storage, temporal bounds) +3. **waveform_channel_metadata** - Signal parameters (sampling rates, gains, calibration) +4. **waveform_feature** - Derived measurements (heart rate, SpO2, arrhythmias, AI features) + +## Installation + +The waveform extension is included with circe_py in the `waveform_extension/` directory. To use it: + +```python +import waveform_extension +waveform_extension.register() +``` + +## Usage Examples + +### Example 1: ICU Monitoring Sessions with Multiple Files + +```python +from waveform_extension.criteria import WaveformOccurrence +from circe.cohortdefinition.core import NumericRange, DateRange + +criteria = WaveformOccurrence( + waveform_occurrence_concept_id=[create_concept(2000000001, "ICU Continuous Monitoring")], + occurrence_start_datetime=DateRange(value="2025-01-01", op="gte"), + num_of_files=NumericRange(value=10, op="gte") +) +``` + +**Generated SQL**: Queries `waveform_occurrence` table for ICU monitoring sessions with ≥10 files starting after 2025-01-01. + +### Example 2: High-Quality ECG Channels + +```python +from waveform_extension.criteria import WaveformChannelMetadata + +criteria = WaveformChannelMetadata( + channel_concept_id=[create_concept(2000000020, "ECG Lead II")], + metadata_concept_id=[create_concept(2000000030, "Sampling Rate")], + value_as_number=NumericRange(value=500, op="gte"), # ≥500 Hz + unit_concept_id=[create_concept(8504, "Hz")] +) +``` + +**Use Case**: Ensure high-quality signals for QRS detection. + +### Example 3: Derived Heart Rate (Most Clinically Valuable) + +```python +from waveform_extension.criteria import WaveformFeature + +criteria = WaveformFeature( + feature_concept_id=[create_concept(3027018, "Heart Rate")], + algorithm_concept_id=[create_concept(2000000040, "Pan-Tompkins QRS Detection")], + value_as_number=NumericRange(value=60, op="gte", extent=100), # 60-100 bpm + unit_concept_id=[create_concept(8541, "beats/min")] +) +``` + +**Use Case**: Identify patients with normal cardiac rhythm derived from waveform data. + +### Example 4: EDF File Format Filter + +```python +from waveform_extension.criteria import WaveformRegistry + +criteria = WaveformRegistry( + file_extension_concept_id=[create_concept(2000000010, "EDF")] +) +``` + +**Use Case**: Filter cohorts to only include patients with EDF waveform files. + +## Architecture + +The extension demonstrates the full circe_py extension capabilities: + +- **Criteria Classes** (`criteria.py`): 4 Pydantic models matching OHDSI spec +- **SQL Builders** (`builders/*.py`): 4 builders generating OHDSI-compliant SQL +- **Markdown Templates** (`templates/*.j2`): 4 Jinja2 templates for human-readable output +- **Registration** (`__init__.py`): Single function to register all components + +## Running the Examples + +```bash +cd /path/to/circe_py +export PYTHONPATH=. +python3 waveform_extension/example_usage.py +``` + +This will verify all 4 tables are correctly implemented with proper SQL generation and markdown rendering. + +## Clinical Use Cases + +### Waveform Occurrence +- ICU telemetry sessions +- Operating room monitoring +- Ambulatory ECG studies +- Sleep studies + +### Waveform Registry +- Filter by file format (EDF, WFDB, etc.) +- Temporal file-level queries +- Storage location audits + +### Waveform Channel Metadata +- Signal quality assurance +- Sampling rate requirements +- Device/procedure linkage +- Calibration verification + +### Waveform Feature +- Continuous vital signs (HR, RR, SpO2, BP) +- Arrhythmia detection +- AI-derived embeddings +- Physiological event detection + +## Reference + +Full specification: https://ohdsi.github.io/WaveformWG/waveform-tables.html diff --git a/waveform_extension/__init__.py b/waveform_extension/__init__.py new file mode 100644 index 0000000..e06b5a9 --- /dev/null +++ b/waveform_extension/__init__.py @@ -0,0 +1,32 @@ +from pathlib import Path +from circe.extensions import get_registry + +from .criteria import WaveformOccurrence, WaveformRegistry, WaveformChannelMetadata, WaveformFeature +from .builders.waveform_occurrence import WaveformOccurrenceSqlBuilder +from .builders.waveform_registry import WaveformRegistrySqlBuilder +from .builders.waveform_channel_metadata import WaveformChannelMetadataSqlBuilder +from .builders.waveform_feature import WaveformFeatureSqlBuilder + +def register(): + """Register the OHDSI Waveform Extension with circe_py.""" + registry = get_registry() + + # 1. Register Criteria Classes + registry.register_criteria_class("WaveformOccurrence", WaveformOccurrence) + registry.register_criteria_class("WaveformRegistry", WaveformRegistry) + registry.register_criteria_class("WaveformChannelMetadata", WaveformChannelMetadata) + registry.register_criteria_class("WaveformFeature", WaveformFeature) + + # 2. Register SQL Builders + registry.register_sql_builder(WaveformOccurrence, WaveformOccurrenceSqlBuilder) + registry.register_sql_builder(WaveformRegistry, WaveformRegistrySqlBuilder) + registry.register_sql_builder(WaveformChannelMetadata, WaveformChannelMetadataSqlBuilder) + registry.register_sql_builder(WaveformFeature, WaveformFeatureSqlBuilder) + + # 3. Register Markdown Templates + template_path = Path(__file__).parent / "templates" + registry.add_template_path(template_path) + registry.register_markdown_template(WaveformOccurrence, "waveform_occurrence.j2") + registry.register_markdown_template(WaveformRegistry, "waveform_registry.j2") + registry.register_markdown_template(WaveformChannelMetadata, "waveform_channel_metadata.j2") + registry.register_markdown_template(WaveformFeature, "waveform_feature.j2") diff --git a/waveform_extension/builders/waveform_channel_metadata.py b/waveform_extension/builders/waveform_channel_metadata.py new file mode 100644 index 0000000..f5abc4e --- /dev/null +++ b/waveform_extension/builders/waveform_channel_metadata.py @@ -0,0 +1,106 @@ +from typing import Set + +from circe.cohortdefinition.builders.base import CriteriaSqlBuilder +from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderUtils, BuilderOptions +from ..criteria import WaveformChannelMetadata + +class WaveformChannelMetadataSqlBuilder(CriteriaSqlBuilder[WaveformChannelMetadata]): + """ + SQL Builder for Waveform Channel Metadata criteria. + + Maps to the waveform_channel_metadata table in the OHDSI Waveform Extension. + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + + def get_query_template(self) -> str: + return """ +SELECT C.person_id, C.waveform_channel_metadata_id as event_id, + NULL as start_date, NULL as end_date, + NULL as visit_occurrence_id, + NULL as sort_date +FROM @cdm_database_schema.waveform_channel_metadata C +LEFT JOIN @cdm_database_schema.waveform_registry WR ON C.waveform_registry_id = WR.waveform_registry_id +@codesetClause +@joinClause +WHERE @whereClause +""" + + def get_default_columns(self) -> Set[CriteriaColumn]: + return set() # Metadata doesn't have standard event columns + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + # Channel metadata doesn't map to standard event columns + raise ValueError(f"Invalid CriteriaColumn for Waveform Channel Metadata: {column}") + + def get_criteria_sql_with_options(self, criteria: WaveformChannelMetadata, options: BuilderOptions) -> str: + query = self.get_query_template() + + where_clauses = [] + join_clauses = [] + codeset_clause = "" + + # Link to registry file + if criteria.waveform_registry_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.waveform_registry_id", criteria.waveform_registry_id) + ) + + # Channel identification + if criteria.channel_concept_id: + ids = [str(c.concept_id) for c in criteria.channel_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.channel_concept_id IN ({','.join(ids)})") + if criteria.waveform_channel_source_value: + where_clauses.append( + BuilderUtils.build_text_filter_clause("C.waveform_channel_source_value", criteria.waveform_channel_source_value) + ) + + # Metadata type + if criteria.metadata_concept_id: + ids = [str(c.concept_id) for c in criteria.metadata_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.metadata_concept_id IN ({','.join(ids)})") + if criteria.metadata_source_value: + where_clauses.append( + BuilderUtils.build_text_filter_clause("C.metadata_source_value", criteria.metadata_source_value) + ) + + # Metadata values + if criteria.value_as_number: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.value_as_number", criteria.value_as_number) + ) + if criteria.value_as_concept_id: + ids = [str(c.concept_id) for c in criteria.value_as_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.value_as_concept_id IN ({','.join(ids)})") + + # Units + if criteria.unit_concept_id: + ids = [str(c.concept_id) for c in criteria.unit_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.unit_concept_id IN ({','.join(ids)})") + + # Device/procedure linkage + if criteria.device_exposure_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.device_exposure_id", criteria.device_exposure_id) + ) + if criteria.procedure_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.procedure_occurrence_id", criteria.procedure_occurrence_id) + ) + + # Get person_id from registry since it's not in channel_metadata + where_clauses.append("WR.person_id IS NOT NULL") + + # Apply replacements + query = query.replace("@cdm_database_schema", options.cdm_database_schema if options else "@cdm_database_schema") + query = query.replace("@codesetClause", codeset_clause) + query = query.replace("@joinClause", "\n".join(join_clauses)) + query = query.replace("@whereClause", " AND ".join(where_clauses) if where_clauses else "1=1") + + # Fix person_id in SELECT - need to pull from registry + query = query.replace("C.person_id", "WR.person_id") + + return query diff --git a/waveform_extension/builders/waveform_feature.py b/waveform_extension/builders/waveform_feature.py new file mode 100644 index 0000000..8512939 --- /dev/null +++ b/waveform_extension/builders/waveform_feature.py @@ -0,0 +1,134 @@ +from typing import Set + +from circe.cohortdefinition.builders.base import CriteriaSqlBuilder +from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderUtils, BuilderOptions +from ..criteria import WaveformFeature + +class WaveformFeatureSqlBuilder(CriteriaSqlBuilder[WaveformFeature]): + """ + SQL Builder for Waveform Feature criteria. + + Maps to the waveform_feature table in the OHDSI Waveform Extension. + This is the most clinically valuable table for cohort selection, containing + derived measurements like heart rate, SpO2, arrhythmia detections, etc. + + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + + def get_query_template(self) -> str: + return """ +SELECT C.person_id, C.waveform_feature_id as event_id, + C.waveform_feature_start_timestamp as start_date, + C.waveform_feature_end_timestamp as end_date, + WO.visit_occurrence_id, + C.waveform_feature_start_timestamp as sort_date +FROM @cdm_database_schema.waveform_feature C +LEFT JOIN @cdm_database_schema.waveform_occurrence WO ON C.waveform_occurrence_id = WO.waveform_occurrence_id +@codesetClause +@joinClause +WHERE @whereClause +""" + + def get_default_columns(self) -> Set[CriteriaColumn]: + return { + CriteriaColumn.START_DATE, + CriteriaColumn.END_DATE, + CriteriaColumn.VISIT_ID + } + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + if column == CriteriaColumn.START_DATE: + return "C.waveform_feature_start_timestamp" + elif column == CriteriaColumn.END_DATE: + return "C.waveform_feature_end_timestamp" + elif column == CriteriaColumn.VISIT_ID: + return "WO.visit_occurrence_id" + else: + raise ValueError(f"Invalid CriteriaColumn for Waveform Feature: {column}") + + def get_criteria_sql_with_options(self, criteria: WaveformFeature, options: BuilderOptions) -> str: + query = self.get_query_template() + + where_clauses = [] + join_clauses = [] + codeset_clause = "" + + # Parent links + if criteria.waveform_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.waveform_occurrence_id", criteria.waveform_occurrence_id) + ) + if criteria.waveform_registry_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.waveform_registry_id", criteria.waveform_registry_id) + ) + if criteria.waveform_channel_metadata_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.waveform_channel_metadata_id", criteria.waveform_channel_metadata_id) + ) + + # Feature type (e.g., heart rate, SpO2) + if criteria.feature_concept_id: + ids = [str(c.concept_id) for c in criteria.feature_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.feature_concept_id IN ({','.join(ids)})") + + # Algorithm used + if criteria.algorithm_concept_id: + ids = [str(c.concept_id) for c in criteria.algorithm_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.algorithm_concept_id IN ({','.join(ids)})") + if criteria.algorithm_source_value: + where_clauses.append( + BuilderUtils.build_text_filter_clause("C.algorithm_source_value", criteria.algorithm_source_value) + ) + + # Temporal window + if criteria.feature_start_timestamp: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_feature_start_timestamp", criteria.feature_start_timestamp) + ) + if criteria.feature_end_timestamp: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_feature_end_timestamp", criteria.feature_end_timestamp) + ) + + # Feature values + if criteria.value_as_number: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.value_as_number", criteria.value_as_number) + ) + if criteria.value_as_concept_id: + ids = [str(c.concept_id) for c in criteria.value_as_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.value_as_concept_id IN ({','.join(ids)})") + + # Units + if criteria.unit_concept_id: + ids = [str(c.concept_id) for c in criteria.unit_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.unit_concept_id IN ({','.join(ids)})") + + # Links to standard OMOP tables + if criteria.measurement_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.measurement_id", criteria.measurement_id) + ) + if criteria.observation_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.observation_id", criteria.observation_id) + ) + + # Get person_id from occurrence + where_clauses.append("WO.person_id IS NOT NULL") + + # Apply replacements + query = query.replace("@cdm_database_schema", options.cdm_database_schema if options else "@cdm_database_schema") + query = query.replace("@codesetClause", codeset_clause) + query = query.replace("@joinClause", "\n".join(join_clauses)) + query = query.replace("@whereClause", " AND ".join(where_clauses) if where_clauses else "1=1") + + # Fix person_id in SELECT - need to pull from occurrence + query = query.replace("C.person_id", "WO.person_id") + + return query diff --git a/waveform_extension/builders/waveform_occurrence.py b/waveform_extension/builders/waveform_occurrence.py new file mode 100644 index 0000000..2e5ba94 --- /dev/null +++ b/waveform_extension/builders/waveform_occurrence.py @@ -0,0 +1,105 @@ +from typing import Set + +from circe.cohortdefinition.builders.base import CriteriaSqlBuilder +from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderUtils, BuilderOptions +from ..criteria import WaveformOccurrence + +class WaveformOccurrenceSqlBuilder(CriteriaSqlBuilder[WaveformOccurrence]): + """ + SQL Builder for Waveform Occurrence criteria. + + Maps to the waveform_occurrence table in the OHDSI Waveform Extension. + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + + def get_query_template(self) -> str: + return """ +SELECT C.person_id, C.waveform_occurrence_id as event_id, + C.waveform_occurrence_start_datetime as start_date, + C.waveform_occurrence_end_datetime as end_date, + C.visit_occurrence_id, + C.waveform_occurrence_start_datetime as sort_date +FROM @cdm_database_schema.waveform_occurrence C +@codesetClause +@joinClause +WHERE @whereClause +""" + + def get_default_columns(self) -> Set[CriteriaColumn]: + return { + CriteriaColumn.START_DATE, + CriteriaColumn.END_DATE, + CriteriaColumn.VISIT_ID, + CriteriaColumn.DOMAIN_CONCEPT + } + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + if column == CriteriaColumn.START_DATE: + return "C.waveform_occurrence_start_datetime" + elif column == CriteriaColumn.END_DATE: + return "C.waveform_occurrence_end_datetime" + elif column == CriteriaColumn.VISIT_ID: + return "C.visit_occurrence_id" + elif column == CriteriaColumn.DOMAIN_CONCEPT: + return "C.waveform_occurrence_concept_id" + else: + raise ValueError(f"Invalid CriteriaColumn for Waveform Occurrence: {column}") + + def get_criteria_sql_with_options(self, criteria: WaveformOccurrence, options: BuilderOptions) -> str: + query = self.get_query_template() + + where_clauses = [] + join_clauses = [] + codeset_clause = "" + + # Filter by waveform occurrence concept + if criteria.waveform_occurrence_concept_id: + ids = [str(c.concept_id) for c in criteria.waveform_occurrence_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.waveform_occurrence_concept_id IN ({','.join(ids)})") + + # Date filters + if criteria.occurrence_start_datetime: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_occurrence_start_datetime", criteria.occurrence_start_datetime) + ) + if criteria.occurrence_end_datetime: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_occurrence_end_datetime", criteria.occurrence_end_datetime) + ) + + # Visit context + if criteria.visit_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.visit_occurrence_id", criteria.visit_occurrence_id) + ) + if criteria.visit_detail_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.visit_detail_id", criteria.visit_detail_id) + ) + + # File metadata + if criteria.num_of_files: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.num_of_files", criteria.num_of_files) + ) + + # Source value text filter + if criteria.waveform_occurrence_source_value: + where_clauses.append( + BuilderUtils.build_text_filter_clause("C.waveform_occurrence_source_value", criteria.waveform_occurrence_source_value) + ) + + # Sequence/chain filtering + if criteria.preceding_waveform_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.preceding_waveform_occurrence_id", criteria.preceding_waveform_occurrence_id) + ) + + # Apply replacements + query = query.replace("@cdm_database_schema", options.cdm_database_schema if options else "@cdm_database_schema") + query = query.replace("@codesetClause", codeset_clause) + query = query.replace("@joinClause", "\n".join(join_clauses)) + query = query.replace("@whereClause", " AND ".join(where_clauses) if where_clauses else "1=1") + + return query diff --git a/waveform_extension/builders/waveform_registry.py b/waveform_extension/builders/waveform_registry.py new file mode 100644 index 0000000..02dc081 --- /dev/null +++ b/waveform_extension/builders/waveform_registry.py @@ -0,0 +1,94 @@ +from typing import Set + +from circe.cohortdefinition.builders.base import CriteriaSqlBuilder +from circe.cohortdefinition.builders.utils import CriteriaColumn, BuilderUtils, BuilderOptions +from ..criteria import WaveformRegistry + +class WaveformRegistrySqlBuilder(CriteriaSqlBuilder[WaveformRegistry]): + """ + SQL Builder for Waveform Registry criteria. + + Maps to the waveform_registry table in the OHDSI Waveform Extension. + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + + def get_query_template(self) -> str: + return """ +SELECT C.person_id, C.waveform_registry_id as event_id, + C.waveform_file_start_datetime as start_date, + C.waveform_file_end_datetime as end_date, + C.visit_occurrence_id, + C.waveform_file_start_datetime as sort_date +FROM @cdm_database_schema.waveform_registry C +@codesetClause +@joinClause +WHERE @whereClause +""" + + def get_default_columns(self) -> Set[CriteriaColumn]: + return { + CriteriaColumn.START_DATE, + CriteriaColumn.END_DATE, + CriteriaColumn.VISIT_ID + } + + def get_table_column_for_criteria_column(self, column: CriteriaColumn) -> str: + if column == CriteriaColumn.START_DATE: + return "C.waveform_file_start_datetime" + elif column == CriteriaColumn.END_DATE: + return "C.waveform_file_end_datetime" + elif column == CriteriaColumn.VISIT_ID: + return "C.visit_occurrence_id" + else: + raise ValueError(f"Invalid CriteriaColumn for Waveform Registry: {column}") + + def get_criteria_sql_with_options(self, criteria: WaveformRegistry, options: BuilderOptions) -> str: + query = self.get_query_template() + + where_clauses = [] + join_clauses = [] + codeset_clause = "" + + # Link to parent occurrence + if criteria.waveform_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.waveform_occurrence_id", criteria.waveform_occurrence_id) + ) + + # File temporal bounds + if criteria.file_start_datetime: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_file_start_datetime", criteria.file_start_datetime) + ) + if criteria.file_end_datetime: + where_clauses.append( + BuilderUtils.build_date_range_clause("C.waveform_file_end_datetime", criteria.file_end_datetime) + ) + + # File format + if criteria.file_extension_concept_id: + ids = [str(c.concept_id) for c in criteria.file_extension_concept_id if c.concept_id] + if ids: + where_clauses.append(f"C.file_extension_concept_id IN ({','.join(ids)})") + if criteria.file_extension_source_value: + where_clauses.append( + BuilderUtils.build_text_filter_clause("C.file_extension_source_value", criteria.file_extension_source_value) + ) + + # Visit context + if criteria.visit_occurrence_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.visit_occurrence_id", criteria.visit_occurrence_id) + ) + if criteria.visit_detail_id: + where_clauses.append( + BuilderUtils.build_numeric_range_clause("C.visit_detail_id", criteria.visit_detail_id) + ) + + # Apply replacements + query = query.replace("@cdm_database_schema", options.cdm_database_schema if options else "@cdm_database_schema") + query = query.replace("@codesetClause", codeset_clause) + query = query.replace("@joinClause", "\n".join(join_clauses)) + query = query.replace("@whereClause", " AND ".join(where_clauses) if where_clauses else "1=1") + + return query diff --git a/waveform_extension/criteria.py b/waveform_extension/criteria.py new file mode 100644 index 0000000..545401f --- /dev/null +++ b/waveform_extension/criteria.py @@ -0,0 +1,284 @@ +from typing import Optional, List +from pydantic import Field, AliasChoices + +from circe.cohortdefinition.criteria import Criteria, CriteriaGroup +from circe.cohortdefinition.core import NumericRange, DateRange, TextFilter +from circe.vocabulary.concept import Concept + +class WaveformOccurrence(Criteria): + """ + Criteria for Waveform Occurrence. + + Represents the clinical and temporal context for a waveform recording session. + Maps to the waveform_occurrence table in the OHDSI Waveform Extension. + + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + # Core concept - type of waveform recording + waveform_occurrence_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("WaveformOccurrenceConceptId", "waveformOccurrenceConceptId"), + serialization_alias="WaveformOccurrenceConceptId" + ) + + # Temporal bounds + occurrence_start_datetime: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("OccurrenceStartDatetime", "occurrenceStartDatetime"), + serialization_alias="OccurrenceStartDatetime" + ) + occurrence_end_datetime: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("OccurrenceEndDatetime", "occurrenceEndDatetime"), + serialization_alias="OccurrenceEndDatetime" + ) + + # Visit context + visit_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("VisitOccurrenceId", "visitOccurrenceId"), + serialization_alias="VisitOccurrenceId" + ) + visit_detail_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("VisitDetailId", "visitDetailId"), + serialization_alias="VisitDetailId" + ) + + # File metadata + num_of_files: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("NumOfFiles", "numOfFiles"), + serialization_alias="NumOfFiles" + ) + + # Source identifiers + waveform_occurrence_source_value: Optional[TextFilter] = Field( + default=None, + validation_alias=AliasChoices("WaveformOccurrenceSourceValue", "waveformOccurrenceSourceValue"), + serialization_alias="WaveformOccurrenceSourceValue" + ) + + # Sequence/chain filtering + preceding_waveform_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("PrecedingWaveformOccurrenceId", "precedingWaveformOccurrenceId"), + serialization_alias="PrecedingWaveformOccurrenceId" + ) + +class WaveformRegistry(Criteria): + """ + Criteria for Waveform Registry. + + Registers individual waveform files with their storage locations, formats, and temporal boundaries. + Maps to the waveform_registry table in the OHDSI Waveform Extension. + + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + # Link to parent occurrence + waveform_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("WaveformOccurrenceId", "waveformOccurrenceId"), + serialization_alias="WaveformOccurrenceId" + ) + + # File temporal bounds + file_start_datetime: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("FileStartDatetime", "fileStartDatetime"), + serialization_alias="FileStartDatetime" + ) + file_end_datetime: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("FileEndDatetime", "fileEndDatetime"), + serialization_alias="FileEndDatetime" + ) + + # File format + file_extension_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("FileExtensionConceptId", "fileExtensionConceptId"), + serialization_alias="FileExtensionConceptId" + ) + file_extension_source_value: Optional[TextFilter] = Field( + default=None, + validation_alias=AliasChoices("FileExtensionSourceValue", "fileExtensionSourceValue"), + serialization_alias="FileExtensionSourceValue" + ) + + # Visit context (denormalized for easier querying) + visit_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("VisitOccurrenceId", "visitOccurrenceId"), + serialization_alias="VisitOccurrenceId" + ) + visit_detail_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("VisitDetailId", "visitDetailId"), + serialization_alias="VisitDetailId" + ) + + +class WaveformChannelMetadata(Criteria): + """ + Criteria for Waveform Channel Metadata. + + Describes per-signal-channel metadata including sampling rates, gains, calibration factors, + and signal quality indicators. + Maps to the waveform_channel_metadata table in the OHDSI Waveform Extension. + + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + # Link to registry file + waveform_registry_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("WaveformRegistryId", "waveformRegistryId"), + serialization_alias="WaveformRegistryId" + ) + + # Channel identification + channel_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("ChannelConceptId", "channelConceptId"), + serialization_alias="ChannelConceptId" + ) + waveform_channel_source_value: Optional[TextFilter] = Field( + default=None, + validation_alias=AliasChoices("WaveformChannelSourceValue", "waveformChannelSourceValue"), + serialization_alias="WaveformChannelSourceValue" + ) + + # Metadata type (e.g., sampling rate, gain, offset) + metadata_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("MetadataConceptId", "metadataConceptId"), + serialization_alias="MetadataConceptId" + ) + metadata_source_value: Optional[TextFilter] = Field( + default=None, + validation_alias=AliasChoices("MetadataSourceValue", "metadataSourceValue"), + serialization_alias="MetadataSourceValue" + ) + + # Metadata values (at least one must be populated) + value_as_number: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("ValueAsNumber", "valueAsNumber"), + serialization_alias="ValueAsNumber" + ) + value_as_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("ValueAsConceptId", "valueAsConceptId"), + serialization_alias="ValueAsConceptId" + ) + + # Units for numeric values + unit_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("UnitConceptId", "unitConceptId"), + serialization_alias="UnitConceptId" + ) + + # Device/procedure linkage + device_exposure_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("DeviceExposureId", "deviceExposureId"), + serialization_alias="DeviceExposureId" + ) + procedure_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("ProcedureOccurrenceId", "procedureOccurrenceId"), + serialization_alias="ProcedureOccurrenceId" + ) + + +class WaveformFeature(Criteria): + """ + Criteria for Waveform Feature. + + Stores measurements and features derived from waveform signals. + Supports both traditional signal processing features and AI-derived embeddings. + Maps to the waveform_feature table in the OHDSI Waveform Extension. + + Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html + """ + # Parent links + waveform_occurrence_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("WaveformOccurrenceId", "waveformOccurrenceId"), + serialization_alias="WaveformOccurrenceId" + ) + waveform_registry_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("WaveformRegistryId", "waveformRegistryId"), + serialization_alias="WaveformRegistryId" + ) + waveform_channel_metadata_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("WaveformChannelMetadataId", "waveformChannelMetadataId"), + serialization_alias="WaveformChannelMetadataId" + ) + + # Feature type (e.g., heart rate, SpO2, QRS detection) + feature_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("FeatureConceptId", "featureConceptId"), + serialization_alias="FeatureConceptId" + ) + + # Algorithm used to derive feature + algorithm_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("AlgorithmConceptId", "algorithmConceptId"), + serialization_alias="AlgorithmConceptId" + ) + algorithm_source_value: Optional[TextFilter] = Field( + default=None, + validation_alias=AliasChoices("AlgorithmSourceValue", "algorithmSourceValue"), + serialization_alias="AlgorithmSourceValue" + ) + + # Temporal window for feature + feature_start_timestamp: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("FeatureStartTimestamp", "featureStartTimestamp"), + serialization_alias="FeatureStartTimestamp" + ) + feature_end_timestamp: Optional[DateRange] = Field( + default=None, + validation_alias=AliasChoices("FeatureEndTimestamp", "featureEndTimestamp"), + serialization_alias="FeatureEndTimestamp" + ) + + # Feature values (at least one must be populated) + value_as_number: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("ValueAsNumber", "valueAsNumber"), + serialization_alias="ValueAsNumber" + ) + value_as_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("ValueAsConceptId", "valueAsConceptId"), + serialization_alias="ValueAsConceptId" + ) + + # Units for numeric values + unit_concept_id: Optional[List[Concept]] = Field( + default=None, + validation_alias=AliasChoices("UnitConceptId", "unitConceptId"), + serialization_alias="UnitConceptId" + ) + + # Links to standard OMOP tables + measurement_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("MeasurementId", "measurementId"), + serialization_alias="MeasurementId" + ) + observation_id: Optional[NumericRange] = Field( + default=None, + validation_alias=AliasChoices("ObservationId", "observationId"), + serialization_alias="ObservationId" + ) + +# Rebuild models to resolve forward references diff --git a/waveform_extension/example_usage.py b/waveform_extension/example_usage.py new file mode 100644 index 0000000..66f9577 --- /dev/null +++ b/waveform_extension/example_usage.py @@ -0,0 +1,210 @@ +""" +Comprehensive example demonstrating the full OHDSI Waveform Extension. + +This example showcases all 4 waveform tables: +1. waveform_occurrence - Clinical context for recording sessions +2. waveform_registry - File metadata +3. waveform_channel_metadata - Signal parameters (sampling rates, etc.) +4. waveform_feature - Derived measurements (heart rate, SpO2, etc.) + +Reference: https://ohdsi.github.io/WaveformWG/waveform-tables.html +""" + +import json +from circe.cohortdefinition import CohortExpression, PrimaryCriteria +from circe.cohortdefinition.cohort_expression_query_builder import CohortExpressionQueryBuilder, BuildExpressionQueryOptions +from circe.cohortdefinition.printfriendly.markdown_render import MarkdownRender +from circe.vocabulary.concept import Concept +from circe.cohortdefinition.core import NumericRange, DateRange + +# Register the extension +import waveform_extension +waveform_extension.register() + +# Import criteria classes +from waveform_extension.criteria import ( + WaveformOccurrence, WaveformRegistry, + WaveformChannelMetadata, WaveformFeature +) + +def create_concept(concept_id, name): + """Helper to create a concept.""" + return Concept( + concept_id=concept_id, + concept_name=name, + invalid_reason="", + domain_id="Waveform", + vocabulary_id="Custom", + concept_class_id="Waveform", + standard_concept="S", + concept_code=str(concept_id) + ) + +# ============================================================================= +# Example 1: ICU monitoring session with multiple files +# ============================================================================= +print("=" * 80) +print("Example 1: ICU Telemetry Session with ≥10 Files") +print("=" * 80) + +waveform_occ_example = WaveformOccurrence( + waveform_occurrence_concept_id=[create_concept(2000000001, "ICU Continuous Monitoring")], + occurrence_start_datetime=DateRange(value="2025-01-01", op="gte"), + num_of_files=NumericRange(value=10, op="gte") +) + +expression1 = CohortExpression( + primary_criteria=PrimaryCriteria( + criteria_list=[waveform_occ_example], + observation_window={"priorDays": 0, "postDays": 0}, + primary_limit={"type": "First"} + ), + concept_sets=[], + inclusion_rules=[], + qualified_limit={"type": "First"}, + expression_limit={"type": "First"} +) + +builder = CohortExpressionQueryBuilder() +options = BuildExpressionQueryOptions() +options.cdm_schema = "cdm" +options.result_schema = "results" +options.cohort_id = 1 + +sql1 = builder.build_expression_query(expression1, options) +print("\n--- SQL Snippet ---") +print(sql1[sql1.find("FROM"):sql1.find("FROM")+200] + "...") +print("\n✓ Table: waveform_occurrence") +print("✓ Filters: ICU monitoring, ≥10 files, starting after 2025-01-01") + +md1 = MarkdownRender().render_cohort_expression(expression1) +print("\n--- Markdown ---") +print(md1.split("\n")[4:7]) # Print relevant lines + +# ============================================================================= +# Example 2: EDF files from emergency department +# ============================================================================= +print("\n" + "=" * 80) +print("Example 2: EDF Waveform Files") +print("=" * 80) + +waveform_reg_example = WaveformRegistry( + file_extension_concept_id=[create_concept(2000000010, "EDF")] +) + +expression2 = CohortExpression( + primary_criteria=PrimaryCriteria( + criteria_list=[waveform_reg_example], + observation_window={"priorDays": 0, "postDays": 0}, + primary_limit={"type": "First"} + ), + concept_sets=[], + inclusion_rules=[], + qualified_limit={"type": "First"} +) + +sql2 = builder.build_expression_query(expression2, options) +print("\n--- SQL Snippet ---") +print(sql2[sql2.find("FROM"):sql2.find("FROM")+200] + "...") +print("\n✓ Table: waveform_registry") +print("✓ Filters: EDF file format only") + +# ============================================================================= +# Example 3: High-quality ECG Lead II at ≥500Hz +# ============================================================================= +print("\n" + "=" * 80) +print("Example 3: High-Quality ECG Lead II (≥500 Hz)") +print("=" * 80) + +waveform_chan_example = WaveformChannelMetadata( + channel_concept_id=[create_concept(2000000020, "ECG Lead II")], + metadata_concept_id=[create_concept(2000000030, "Sampling Rate")], + value_as_number=NumericRange(value=500, op="gte"), # ≥500 Hz + unit_concept_id=[create_concept(8504, "Hz")] +) + +expression3 = CohortExpression( + primary_criteria=PrimaryCriteria( + criteria_list=[waveform_chan_example], + observation_window={"priorDays": 0, "postDays": 0}, + primary_limit={"type": "First"} + ), + concept_sets=[], + inclusion_rules=[], + qualified_limit={"type": "First"} +) + +sql3 = builder.build_expression_query(expression3, options) +print("\n--- SQL Snippet ---") +print(sql3[sql3.find("FROM"):sql3.find("FROM")+250] + "...") +print("\n✓ Table: waveform_channel_metadata") +print("✓ Filters: ECG Lead II, sampling rate ≥500 Hz") +print("✓ Use Case: Ensure high-quality signals for QRS detection") + +# ============================================================================= +# Example 4: Derived Heart Rate 60-100 bpm (MOST CLINICALLY VALUABLE) +# ============================================================================= +print("\n" + "=" * 80) +print("Example 4: Derived Heart Rate 60-100 bpm (Normal Range)") +print("=" * 80) + +waveform_feat_example = WaveformFeature( + feature_concept_id=[create_concept(3027018, "Heart Rate")], + algorithm_concept_id=[create_concept(2000000040, "Pan-Tompkins QRS Detection")], + value_as_number=NumericRange(value=60, op="gte", extent=100), # 60-100 bpm + unit_concept_id=[create_concept(8541, "beats/min")] +) + +expression4 = CohortExpression( + primary_criteria=PrimaryCriteria( + criteria_list=[waveform_feat_example], + observation_window={"priorDays": 0, "postDays": 0}, + primary_limit={"type": "First"} + ), + concept_sets=[], + inclusion_rules=[], + qualified_limit={"type": "First"} +) + +sql4 = builder.build_expression_query(expression4, options) +print("\n--- SQL Snippet ---") +print(sql4[sql4.find("FROM"):sql4.find("FROM")+250] + "...") +print("\n✓ Table: waveform_feature") +print("✓ Filters: Heart Rate 60-100 bpm derived by Pan-Tompkins algorithm") +print("✓ Use Case: Identify patients with normal cardiac rhythm") + +md4 = MarkdownRender().render_cohort_expression(expression4) +print("\n--- Markdown ---") +print(md4.split("\n")[4:7]) # Print relevant lines + +# ============================================================================= +# Verification Summary +# ============================================================================= +print("\n" + "=" * 80) +print("VERIFICATION SUMMARY") +print("=" * 80) + +checks = [ + ("waveform_occurrence table used", "waveform_occurrence" in sql1), + ("waveform_registry table used", "waveform_registry" in sql2), + ("waveform_channel_metadata table used", "waveform_channel_metadata" in sql3), + ("waveform_feature table used", "waveform_feature" in sql4), + ("Correct column: waveform_occurrence_concept_id", "waveform_occurrence_concept_id" in sql1), + ("Correct column: waveform_occurrence_start_datetime", "waveform_occurrence_start_datetime" in sql1), + ("Correct column: file_extension_concept_id", "file_extension_concept_id" in sql2), + ("Correct column: channel_concept_id", "channel_concept_id" in sql3), + ("Correct column: feature_concept_id", "feature_concept_id" in sql4), + ("Markdown rendering works", "waveform-derived feature" in md4.lower()) +] + +for check_name, result in checks: + status = "✓" if result else "✗" + print(f"{status} {check_name}") + +all_passed = all(r for _, r in checks) +print("\n" + ("="*80)) +if all_passed: + print("SUCCESS: All 4 OHDSI Waveform Extension tables implemented correctly!") +else: + print("FAILURE: Some checks failed") +print("=" * 80) diff --git a/waveform_extension/templates/waveform_channel_metadata.j2 b/waveform_extension/templates/waveform_channel_metadata.j2 new file mode 100644 index 0000000..de54b86 --- /dev/null +++ b/waveform_extension/templates/waveform_channel_metadata.j2 @@ -0,0 +1,32 @@ +{%- import 'input_types.j2' as inputTypes -%} + +{%- macro WaveformChannelMetadata(c, level, isPlural=true, countCriteria={}, indexLabel="cohort entry") -%} + {%- set attrs = [] -%} + + {%- if c.channel_concept_id -%} + {%- set temp -%}channel type: {{ inputTypes.ConceptList(c.channel_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.metadata_concept_id -%} + {%- set temp -%}metadata type: {{ inputTypes.ConceptList(c.metadata_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.value_as_number -%} + {%- set temp -%}value {{ inputTypes.NumericRange(c.value_as_number) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.unit_concept_id -%} + {%- set temp -%}units: {{ inputTypes.ConceptList(c.unit_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + waveform channel metadata record{% if isPlural and not c.first %}s{% endif %} + {%- if c.first %} for the first time in the person's history{% endif -%} + {%- if attrs|length > 0 -%}, {{ attrs|join("; ") }}{%- endif -%} + . +{%- endmacro -%} + +{{ WaveformChannelMetadata(criteria, level, isPlural, countCriteria, indexLabel) }} diff --git a/waveform_extension/templates/waveform_feature.j2 b/waveform_extension/templates/waveform_feature.j2 new file mode 100644 index 0000000..efecb76 --- /dev/null +++ b/waveform_extension/templates/waveform_feature.j2 @@ -0,0 +1,42 @@ +{%- import 'input_types.j2' as inputTypes -%} + +{%- macro WaveformFeature(c, level, isPlural=true, countCriteria={}, indexLabel="cohort entry") -%} + {%- set attrs = [] -%} + + {%- if c.feature_concept_id -%} + {%- set temp -%}feature type: {{ inputTypes.ConceptList(c.feature_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.algorithm_concept_id -%} + {%- set temp -%}detected by {{ inputTypes.ConceptList(c.algorithm_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.value_as_number -%} + {%- set temp -%}value {{ inputTypes.NumericRange(c.value_as_number) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.unit_concept_id -%} + {%- set temp -%}units: {{ inputTypes.ConceptList(c.unit_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.feature_start_timestamp -%} + {%- set temp -%}starting {{ inputTypes.DateRange(c.feature_start_timestamp) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.feature_end_timestamp -%} + {%- set temp -%}ending {{ inputTypes.DateRange(c.feature_end_timestamp) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + waveform-derived feature{% if isPlural and not c.first %}s{% endif %} + {%- if c.first %} for the first time in the person's history{% endif -%} + {%- if attrs|length > 0 -%}, {{ attrs|join("; ") }}{%- endif -%} + . +{%- endmacro -%} + +{{ WaveformFeature(criteria, level, isPlural, countCriteria, indexLabel) }} diff --git a/waveform_extension/templates/waveform_occurrence.j2 b/waveform_extension/templates/waveform_occurrence.j2 new file mode 100644 index 0000000..db06653 --- /dev/null +++ b/waveform_extension/templates/waveform_occurrence.j2 @@ -0,0 +1,39 @@ +{%- import 'input_types.j2' as inputTypes -%} + +{%- macro WaveformOccurrence(c, level, isPlural=true, countCriteria={}, indexLabel="cohort entry") -%} + {%- set attrs = [] -%} + + {# Reuse core WindowCriteria logic if possible, or reimplement #} + {%- if countCriteria and countCriteria.occurrence and countCriteria.occurrence.count_window -%} + {# Simplifying for example #} + {%- set temp -%}occurring relative to {{ indexLabel }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.occurrence_start_datetime -%} + {%- set temp -%}starting {{ inputTypes.DateRange(c.occurrence_start_datetime) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.occurrence_end_datetime -%} + {%- set temp -%}ending {{ inputTypes.DateRange(c.occurrence_end_datetime) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.waveform_occurrence_concept_id -%} + {%- set temp -%}waveform type: {{ inputTypes.ConceptList(c.waveform_occurrence_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.num_of_files -%} + {%- set temp -%}with {{ inputTypes.NumericRange(c.num_of_files) }} files{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + waveform occurrence{% if isPlural and not c.first %}s{% endif %} + {%- if c.first %} for the first time in the person's history{% endif -%} + {%- if attrs|length > 0 -%}, {{ attrs|join("; ") }}{%- endif -%} + . +{%- endmacro -%} + +{{ WaveformOccurrence(criteria, level, isPlural, countCriteria, indexLabel) }} diff --git a/waveform_extension/templates/waveform_registry.j2 b/waveform_extension/templates/waveform_registry.j2 new file mode 100644 index 0000000..9e92956 --- /dev/null +++ b/waveform_extension/templates/waveform_registry.j2 @@ -0,0 +1,27 @@ +{%- import 'input_types.j2' as inputTypes -%} + +{%- macro WaveformRegistry(c, level, isPlural=true, countCriteria={}, indexLabel="cohort entry") -%} + {%- set attrs = [] -%} + + {%- if c.file_start_datetime -%} + {%- set temp -%}file starting {{ inputTypes.DateRange(c.file_start_datetime) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.file_end_datetime -%} + {%- set temp -%}file ending {{ inputTypes.DateRange(c.file_end_datetime) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + {%- if c.file_extension_concept_id -%} + {%- set temp -%}file format: {{ inputTypes.ConceptList(c.file_extension_concept_id) }}{%- endset -%} + {%- set _ = attrs.append(temp) -%} + {%- endif -%} + + waveform file{% if isPlural and not c.first %}s{% endif %} + {%- if c.first %} for the first time in the person's history{% endif -%} + {%- if attrs|length > 0 -%}, {{ attrs|join("; ") }}{%- endif -%} + . +{%- endmacro -%} + +{{ WaveformRegistry(criteria, level, isPlural, countCriteria, indexLabel) }}