From 735bc5fe3aa9511b88558b0382db9440155ed51e Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 12 May 2025 10:28:50 -0500 Subject: [PATCH 1/8] identify and flag naming conflicts in 2d vs 3d skims --- activitysim/core/skim_dataset.py | 63 +++++++++++++++++++++++++- activitysim/core/skim_dict_factory.py | 65 ++++++++++++++++++++++++--- 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 1ed871ec0f..49ce02d906 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -3,6 +3,7 @@ import glob import logging import os +import re import time from functools import partial from pathlib import Path @@ -499,8 +500,6 @@ def _apply_digital_encoding(dataset, digital_encodings): As modified """ if digital_encodings: - import re - # apply once, before saving to zarr, will stick around in cache for encoding in digital_encodings: logger.info(f"applying zarr digital-encoding: {encoding}") @@ -753,6 +752,66 @@ def load_skim_dataset_to_shared_memory(state, skim_tag="taz") -> xr.Dataset: d = None # skims are not stored in shared memory, so we need to load them do_not_save_zarr = False + potential_conflicts = network_los_preload.skims_info.get(skim_tag).skim_conflicts + if potential_conflicts: + # There are some conflicts in the skims, where both time-dependent + # and time-agnostic skim with the same name are present. We need + # to check we have sufficient ignore rules in place to correct this + # condition. + + def _should_ignore(ignore, x): + if isinstance(ignore, str): + ignore = [ignore] + if ignore is not None: + for i in ignore: + if re.match(i, x): + return True + return False + + ignore = state.settings.omx_ignore_patterns + problems = [] + for time_agnostic, time_dependent in potential_conflicts.items(): + # option 1, ignore all the time-dependent skims + # if this is fulfilled, we are ok and can proceed + if all((_should_ignore(ignore, i)) for i in time_dependent): + continue + # option 2, ignore the time-agnostic skim + # if this is fulfilled, we are ok and can proceed + if _should_ignore(ignore, time_agnostic): + continue + # otherwise, we have a problem. collect all the problems + # and raise an error at the end listing all of them + problems.append(time_agnostic) + if problems: + solution_1 = "__.+'\n - '^".join(problems) + solution_2 = "$'\n - '^".join(problems) + # we have a problem, raise an error + logger.error( + f"skims {problems} are present in both time-dependent and time-agnostic formats.\n" + "Please add ignore rules to the omx_ignore_patterns setting to resolve this issue.\n" + "To ignore the time dependent skims, add the following to your settings file:\n" + "\n" + "omx_ignore_patterns:\n" + f" - '^{solution_1}__.+'\n" + "\n" + "To ignore the time agnostic skims, add the following to your settings file:\n" + "\n" + "omx_ignore_patterns:\n" + f" - '^{solution_2}$'\n" + "\n" + "You can also do some variation or combination of the two, as long as you resolve\n" + "the conflict(s). In addition, note that minor edits to model spec files may be\n" + "needed to accommodate these changes in how skim data is represented (e.g. changing\n" + "`odt_skims` to `od_skims`, or similar modifications). Alternatively, you can\n" + "modify the skim data in the source files to remove the naming conflicts, which\n" + "is typically done upstream of ActivitySim in whatever tool you are using to create" + "the skims in the first place.\n" + ) + # raise an error to stop the run + raise ValueError( + f"skims {problems} are present in both time-dependent and time-agnostic formats" + ) + if d is None: time_periods = _dedupe_time_periods(network_los_preload) if zarr_file: diff --git a/activitysim/core/skim_dict_factory.py b/activitysim/core/skim_dict_factory.py index 78c7053583..e0bd23ef3e 100644 --- a/activitysim/core/skim_dict_factory.py +++ b/activitysim/core/skim_dict_factory.py @@ -8,6 +8,7 @@ import os import warnings from abc import ABC +from collections import defaultdict import numpy as np import openmatrix as omx @@ -86,6 +87,7 @@ def __init__(self, state, skim_tag, network_los): self.offset_map_name = None self.offset_map = None self.omx_keys = None + self.skim_conflicts = None self.base_keys = None self.block_offsets = None @@ -117,7 +119,12 @@ def load_skim_info(self, state, skim_tag): logger.debug(f"load_skim_info {skim_tag} reading {omx_file_path}") with omx.open_file(omx_file_path, mode="r") as omx_file: - # fixme call to omx_file.shape() failing in windows p3.5 + + # Check the shape of the skims. All skim files loaded within this + # loop need to have the same shape. For the first file, the + # shape is set to the omx_shape attribute, so we know what shape + # to expect. For subsequent files, we check that the shape is the + # same as the first file. If not, we raise an error. if self.omx_shape is None: self.omx_shape = tuple( int(i) for i in omx_file.shape() @@ -127,6 +134,12 @@ def load_skim_info(self, state, skim_tag): int(i) for i in omx_file.shape() ), f"Mismatch shape {self.omx_shape} != {omx_file.shape()}" + # Check that all the matrix names are unique across all the + # omx files. This check is only looking at the name as stored in + # the file, and is not processing any time period transformations. + # If duplicate names are found, a warning is issued. This is not + # a fatal error, but it is inefficient and may be symptom of a + # deeper problem. for skim_name in omx_file.listMatrices(): if skim_name in self.omx_manifest: warnings.warn( @@ -134,6 +147,11 @@ def load_skim_info(self, state, skim_tag): ) self.omx_manifest[skim_name] = omx_file_path + # We load the offset map if it exists. This is expected to be + # a 1D array of integers that that gives ID values for each TAZ + # in the skims. ActivitySim expects there to be only one such + # mapping, although it can appear multiple times (e.g. once in + # each file). for m in omx_file.listMappings(): if self.offset_map is None: self.offset_map_name = m @@ -146,21 +164,54 @@ def load_skim_info(self, state, skim_tag): f"Multiple mappings in omx file: {self.offset_map_name} != {m}" ) - # - omx_keys dict maps skim key to omx_key - # DISTWALK: DISTWALK - # ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM, ... + # Create the `omx_keys` mapping, which connects skim key to omx_key. + # The skim key is either a single string that names a skim that is not + # time-dependent, or a 2-tuple of strings which names a skim and a time + # period. The omx_key is the original name of the skim in the omx file. + # For non-time-dependent skims, the omx_key is the same as the skim key, + # e.g. DISTWALK: DISTWALK. For time-dependent skims, the omx_key is the + # skim key with the time period appended, + # e.g. ('DRV_COM_WLK_BOARDS', 'AM'): DRV_COM_WLK_BOARDS__AM. self.omx_keys = dict() for skim_name in self.omx_manifest.keys(): key1, sep, key2 = skim_name.partition("__") - # - ignore composite tags not in dim3_tags_to_load if dim3_tags_to_load and sep and key2 not in dim3_tags_to_load: + # If a skim is found that has a time period that is not one of + # the known named time periods, a warning is issued, and that + # skim is ignored. This is not a fatal error, but it may be a + # symptom of a deeper problem. + warnings.warn(f"skim '{key1}' has unknown time period '{key2}'") continue - skim_key = (key1, key2) if sep else key1 - self.omx_keys[skim_key] = skim_name + # Create a skim_conflicts set, which identifies any skims that have both + # time-dependent and time-agnostic versions. This condition in and of + # itself is not a fatal error, as it is possible to have both types of skims + # in the same data when using the legacy codebase. When using skim_dataset + # instead of skim_dictionary (the former is required when using sharrow) this + # condition is no longer allowed, although we can potentially recover if + # the user has specified instructions that certain skim variables are not + # to be loaded. The recovery option is checked later, in the skim_dataset + # module, as that is where the skim variables are actually loaded. + time_dependent_skims = defaultdict(set) + time_agnostic_skims = set() + for k, v in self.omx_keys.items(): + if isinstance(k, tuple): + time_dependent_skims[k[0]].add(v) + else: + time_agnostic_skims.add(k) + self.skim_conflicts = { + k: v for k, v in time_dependent_skims.items() if k in time_agnostic_skims + } + if self.skim_conflicts: + msg = "some skims have both time-dependent and time-agnostic versions:" + for k in self.skim_conflicts: + msg += f"\n- {k}" + warnings.warn(msg) + + # Count the number of skims in the omx file self.num_skims = len(self.omx_keys) # - key1_subkeys dict maps key1 to dict of subkeys with that key1 From 459aed7e2c61e1c248e20ed2b147d2a98beaa285 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 12 May 2025 14:24:14 -0500 Subject: [PATCH 2/8] add skims doc --- docs/users-guide/model_anatomy.rst | 71 ++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/docs/users-guide/model_anatomy.rst b/docs/users-guide/model_anatomy.rst index 461991a2e4..eee14aa576 100644 --- a/docs/users-guide/model_anatomy.rst +++ b/docs/users-guide/model_anatomy.rst @@ -21,21 +21,21 @@ file and the ``configs\network_los.yaml`` file. The following tables are currently implemented: - * households - household attributes for each household being simulated. Index: ``household_id`` (see ``activitysim.abm.tables.households.py``) - * landuse - zonal land use (such as population and employment) attributes. Index: ``zone_id`` (see ``activitysim.abm.tables.landuse.py``) - * persons - person attributes for each person being simulated. Index: ``person_id`` (see ``activitysim.abm.tables.persons.py``) - * time windows - manages person time windows throughout the simulation. See :ref:`time_windows`. Index: ``person_id`` (see the person_windows table create decorator in ``activitysim.abm.tables.time_windows.py``) - * tours - tour attributes for each tour (mandatory, non-mandatory, joint, and atwork-subtour) being simulated. Index: ``tour_id`` (see ``activitysim.abm.models.util.tour_frequency.py``) - * trips - trip attributes for each trip being simulated. Index: ``trip_id`` (see ``activitysim.abm.models.stop_frequency.py``) +* households - household attributes for each household being simulated. Index: ``household_id`` (see ``activitysim.abm.tables.households.py``) +* landuse - zonal land use (such as population and employment) attributes. Index: ``zone_id`` (see ``activitysim.abm.tables.landuse.py``) +* persons - person attributes for each person being simulated. Index: ``person_id`` (see ``activitysim.abm.tables.persons.py``) +* time windows - manages person time windows throughout the simulation. See :ref:`time_windows`. Index: ``person_id`` (see the person_windows table create decorator in ``activitysim.abm.tables.time_windows.py``) +* tours - tour attributes for each tour (mandatory, non-mandatory, joint, and atwork-subtour) being simulated. Index: ``tour_id`` (see ``activitysim.abm.models.util.tour_frequency.py``) +* trips - trip attributes for each trip being simulated. Index: ``trip_id`` (see ``activitysim.abm.models.stop_frequency.py``) A few additional tables are also used, which are not really tables, but classes: - * input store - reads input data tables from the input data store - * constants - various constants used throughout the model system, such as person type codes - * shadow pricing - shadow price calculator and associated utility methods, see :ref:`shadow_pricing` - * size terms - created by reading the ``destination_choice_size_terms.csv`` input file. Index - ``segment`` (see ``activitysim.abm.tables.size_terms.py``) - * skims - each model runs requires skims, but how the skims are defined can vary significantly depending on the ActivitySim implementation. The skims class defines Inject injectables to access the skim matrices. The skims class reads the skims from the omx_file on disk. - * table dictionary - stores which tables should be registered as random number generator channels for restartability of the pipeline +* input store - reads input data tables from the input data store +* constants - various constants used throughout the model system, such as person type codes +* shadow pricing - shadow price calculator and associated utility methods, see :ref:`shadow_pricing` +* size terms - created by reading the ``destination_choice_size_terms.csv`` input file. Index - ``segment`` (see ``activitysim.abm.tables.size_terms.py``) +* skims - each model runs requires skims, but how the skims are defined can vary significantly depending on the ActivitySim implementation. The skims class defines Inject injectables to access the skim matrices. The skims class reads the skims from the omx_file on disk. +* table dictionary - stores which tables should be registered as random number generator channels for restartability of the pipeline @@ -54,10 +54,10 @@ system for non-motorized travel, and optionally a transit access points (TAPs) z The three versions of multiple zone systems are one-zone, two-zone, and three-zone. - * **One-zone**: This version is based on TM1 and supports only TAZs. All origins and +* **One-zone**: This version is based on TM1 and supports only TAZs. All origins and destinations are represented at the TAZ level, and all skims including auto, transit, and non-motorized times and costs are also represented at the TAZ level. - * **Two-zone**: This version is similar to many DaySim models. It uses microzones (MAZs) +* **Two-zone**: This version is similar to many DaySim models. It uses microzones (MAZs) for origins and destinations, and TAZs for specification of auto and transit times and costs. Impedance for walk or bike all-the-way from the origin to the destination can be specified at the MAZ level for close together origins and destinations, and at @@ -65,7 +65,7 @@ The three versions of multiple zone systems are one-zone, two-zone, and three-zo walk access and egress times with times specified in the MAZ file by transit mode. Careful pre-calculation of the assumed transit walk access and egress time by MAZ and transit mode is required depending on the network scenario. - * **Three-zone**: This version is based on the SANDAG generation of CT-RAMP models. +* **Three-zone**: This version is based on the SANDAG generation of CT-RAMP models. Origins and destinations are represented at the MAZ level. Impedance for walk or bike all-the-way from the origin to the destination can be specified at the MAZ level for close together origins and destinations, and at the TAZ level for further @@ -84,8 +84,15 @@ The three versions of multiple zone systems are one-zone, two-zone, and three-zo combinations of nearby boarding and alighting TAPs for each origin destination MAZ pair. -Regions that have an interest in more precise transit forecasts may wish to adopt the -three-zone approach, while other regions may adopt the one or two-zone approach. The +.. caution:: + The ActivitySim consortium is moving away from the three-zone approach, in favor of + to the one- or two-zone approaches. The code for the three-zone approach remains + available for users who have already implemented it, but it is recommended that + users consider the one- or two-zone approaches for new implementations. + The three-zone system may be formally deprecated and removed in the future. + +Regions that have an interest in more precise transit and non-motorized forecasts +may wish to adopt the two-zone approach, while other regions may adopt the one or two-zone approach. The microzone version requires coding households and land use at the microzone level. Typically an all-streets network is used for representation of non-motorized impedances. This requires a routable all-streets network, with centroids and connectors for @@ -103,10 +110,38 @@ transit modeling. initial development, these examples were insufficient for validation and performance testing of the new software. As a result, the :ref:`prototype_marin` example was created. -Example simple test configurations and inputs for two and three-zone system models are described below. + +.. _omx_skims : + +Skims +~~~~~ + +The basic level-of-service data that represents the tranportation system is +made available to ActivitySim via one or more sets of "skims". Skims are +essentially matrices of travel times, costs, and other level of service attributes, +calculated between various zones in the system. This skim data is made available +to ActivitySim using files in the`openmatrix `__ +(OMX) format. All of the skim data can be provided in a single OMX file, or +multiple OMX files can be used (this is typical for larger models, to keep file +sizes manageable). If multiple files are used, the content of those files is +simply concatenated together into a single notional bucket of skim data when +the model is run. Within that bucket, each skim variable is identified by a unique name. +For skim variables that vary across model time periods, the time period is +appended to the skim name, separated by a double underscore (e.g. ``BUS_IVT__AM``). + +.. caution:: + When using "legacy" mode for ActivitySim, it is possible (but not recommended) + to have a skim variable that has both a time period agnostic value as well as + a set of time period dependent values, e.g. "WALK_TIME" and "WALK_TIME__AM". + This is not supported when using "sharrow" mode, and will result in an error. + Instead, each variable should have a unique name, which can be easily achieved by + changing the name on the time period agnostic value, i.e. instead of "WALK_TIME" + use ""WALK_TIME_BASE". + Examples ~~~~~~~~ +Example simple test configurations and inputs for two and three-zone system models are described below. To run the two zone and three zone system examples, do the following: From 90ebdb4ed87168d22b13acb3caa3e74d11ee3529 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 12 May 2025 14:30:18 -0500 Subject: [PATCH 3/8] run on workflow_dispatch --- .github/workflows/branch-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/branch-docs.yml b/.github/workflows/branch-docs.yml index dde4f03b73..3f2b8d549d 100644 --- a/.github/workflows/branch-docs.yml +++ b/.github/workflows/branch-docs.yml @@ -7,7 +7,7 @@ on: jobs: docbuild: - if: "contains(github.event.head_commit.message, '[makedocs]') && (github.repository_owner != 'ActivitySim') && (github.ref_name != 'develop')" + if: "github.event_name == 'workflow_dispatch' || (contains(github.event.head_commit.message, '[makedocs]') && (github.repository_owner != 'ActivitySim') && (github.ref_name != 'develop'))" # develop branch docs are built at the end of the core test workflow, regardless of repository owner or commit message flags name: ubuntu-latest py3.10 runs-on: ubuntu-latest From 5957ee1ee3e6b529829bbb2beab2793ef6a4bc8c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 12 May 2025 16:22:16 -0500 Subject: [PATCH 4/8] add link to docs --- activitysim/core/skim_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index 49ce02d906..ad3d2c0aeb 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -806,6 +806,8 @@ def _should_ignore(ignore, x): "modify the skim data in the source files to remove the naming conflicts, which\n" "is typically done upstream of ActivitySim in whatever tool you are using to create" "the skims in the first place.\n" + "\n" + "See [https://activitysim.github.io/?q=skims] for more information.\n" ) # raise an error to stop the run raise ValueError( From 80c649e0b26cc87d218788b2f687f4d6baae913f Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 12 May 2025 16:34:54 -0500 Subject: [PATCH 5/8] write error message in exception also --- activitysim/core/skim_dataset.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/activitysim/core/skim_dataset.py b/activitysim/core/skim_dataset.py index ad3d2c0aeb..1e359af3d3 100644 --- a/activitysim/core/skim_dataset.py +++ b/activitysim/core/skim_dataset.py @@ -786,7 +786,7 @@ def _should_ignore(ignore, x): solution_1 = "__.+'\n - '^".join(problems) solution_2 = "$'\n - '^".join(problems) # we have a problem, raise an error - logger.error( + error_message = ( f"skims {problems} are present in both time-dependent and time-agnostic formats.\n" "Please add ignore rules to the omx_ignore_patterns setting to resolve this issue.\n" "To ignore the time dependent skims, add the following to your settings file:\n" @@ -802,17 +802,17 @@ def _should_ignore(ignore, x): "You can also do some variation or combination of the two, as long as you resolve\n" "the conflict(s). In addition, note that minor edits to model spec files may be\n" "needed to accommodate these changes in how skim data is represented (e.g. changing\n" - "`odt_skims` to `od_skims`, or similar modifications). Alternatively, you can\n" - "modify the skim data in the source files to remove the naming conflicts, which\n" - "is typically done upstream of ActivitySim in whatever tool you are using to create" - "the skims in the first place.\n" + "`odt_skims` to `od_skims`, or similar modifications wherever the offending variable\n" + "names are used). Alternatively, you can modify the skim data in the source files to\n" + "remove the naming conflicts, which is typically done upstream of ActivitySim in\n" + "whatever tool you are using to create the skims in the first place.\n" "\n" "See [https://activitysim.github.io/?q=skims] for more information.\n" ) - # raise an error to stop the run - raise ValueError( - f"skims {problems} are present in both time-dependent and time-agnostic formats" - ) + # write the error message to the log + logger.error(error_message) + # raise an error to stop the run, put the entire error message there also + raise ValueError(error_message) if d is None: time_periods = _dedupe_time_periods(network_los_preload) From f74c8d4db831ded3290c2fd1b9021d8d3726ea45 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 13 May 2025 11:56:43 -0500 Subject: [PATCH 6/8] add error message example --- docs/users-guide/model_anatomy.rst | 44 ++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/docs/users-guide/model_anatomy.rst b/docs/users-guide/model_anatomy.rst index eee14aa576..f5408e7b28 100644 --- a/docs/users-guide/model_anatomy.rst +++ b/docs/users-guide/model_anatomy.rst @@ -116,7 +116,7 @@ transit modeling. Skims ~~~~~ -The basic level-of-service data that represents the tranportation system is +The basic level-of-service data that represents the transportation system is made available to ActivitySim via one or more sets of "skims". Skims are essentially matrices of travel times, costs, and other level of service attributes, calculated between various zones in the system. This skim data is made available @@ -133,10 +133,44 @@ appended to the skim name, separated by a double underscore (e.g. ``BUS_IVT__AM` When using "legacy" mode for ActivitySim, it is possible (but not recommended) to have a skim variable that has both a time period agnostic value as well as a set of time period dependent values, e.g. "WALK_TIME" and "WALK_TIME__AM". - This is not supported when using "sharrow" mode, and will result in an error. - Instead, each variable should have a unique name, which can be easily achieved by - changing the name on the time period agnostic value, i.e. instead of "WALK_TIME" - use ""WALK_TIME_BASE". + If you have conflicting names like this, a warning message will be issued, which + will look like this in an ActivitySim log file: + + WARNING: activitysim/core/skim_dict_factory.py:212: + UserWarning: some skims have both time-dependent and time-agnostic versions: + - BIKE_LOGSUM + - BIKE_TIME + + This is a warning, not an error, and the model will run if not using sharrow. + However, if "sharrow" mode is activated, this will result in an error once the + skims are actually loaded, unless instructions are included in the settings file + to resolve the conflict. The error message will look like this: + + ERROR: skims ['BIKE_TIME'] are present in both time-dependent and time-agnostic formats. + Please add ignore rules to the omx_ignore_patterns setting to resolve this issue. + To ignore the time dependent skims, add the following to your settings file: + + omx_ignore_patterns: + - '^BIKE_TIME__.+' + + To ignore the time agnostic skims, add the following to your settings file: + + omx_ignore_patterns: + - '^BIKE_TIME$' + + You can also do some variation or combination of the two, as long as you resolve + the conflict(s). In addition, note that minor edits to model spec files may be + needed to accommodate these changes in how skim data is represented (e.g. changing + `odt_skims` to `od_skims`, or similar modifications wherever the offending variable + names are used). Alternatively, you can modify the skim data in the source files to + remove the naming conflicts, which is typically done upstream of ActivitySim in + whatever tool you are using to create the skims in the first place. + + It should be relatively simple to resolve the conflict by following the instructions + in the error message. The cleaner and more reliable solution is to ensure each skim + variable has a unique name, e.g. by changing the name on the time period agnostic + value, so that instead of "BIKE_TIME" it is "BIKE_TIME_BASE". This may also require + minor edits to the model spec files to accommodate the new skim name. Examples From 2d6c59fe6e13e8043e126f2603b7c9c51990a665 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 13 May 2025 12:56:18 -0500 Subject: [PATCH 7/8] code block formatting --- docs/users-guide/model_anatomy.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/users-guide/model_anatomy.rst b/docs/users-guide/model_anatomy.rst index f5408e7b28..ad93a7a771 100644 --- a/docs/users-guide/model_anatomy.rst +++ b/docs/users-guide/model_anatomy.rst @@ -136,6 +136,8 @@ appended to the skim name, separated by a double underscore (e.g. ``BUS_IVT__AM` If you have conflicting names like this, a warning message will be issued, which will look like this in an ActivitySim log file: + .. code-block:: text + WARNING: activitysim/core/skim_dict_factory.py:212: UserWarning: some skims have both time-dependent and time-agnostic versions: - BIKE_LOGSUM @@ -146,6 +148,8 @@ appended to the skim name, separated by a double underscore (e.g. ``BUS_IVT__AM` skims are actually loaded, unless instructions are included in the settings file to resolve the conflict. The error message will look like this: + .. code-block:: text + ERROR: skims ['BIKE_TIME'] are present in both time-dependent and time-agnostic formats. Please add ignore rules to the omx_ignore_patterns setting to resolve this issue. To ignore the time dependent skims, add the following to your settings file: From f21c4be267856fcf18574ccc6104f4a3c83f3244 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 29 Jul 2025 11:18:47 -0500 Subject: [PATCH 8/8] unit tests --- .github/workflows/core_tests.yml | 12 +-- test/test_skim_name_conflicts.py | 125 +++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 test/test_skim_name_conflicts.py diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 2542ffd546..4811282783 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -36,7 +36,7 @@ jobs: uses: actions/setup-python@v5 with: python-version-file: ".python-version" - + - name: Install activitysim run: | uv sync --locked --only-group github-action @@ -128,6 +128,8 @@ jobs: run: | uv run pytest --pyargs activitysim.cli + - run: uv run pytest test/test_skim_name_conflicts.py + - run: uv run pytest test/random_seed/test_random_seed.py builtin_regional_models: needs: foundation @@ -166,7 +168,7 @@ jobs: uses: actions/setup-python@v5 with: python-version-file: ".python-version" - + - name: Install activitysim run: | uv sync --locked --only-group github-action @@ -235,7 +237,7 @@ jobs: uses: actions/setup-python@v5 with: python-version-file: ".python-version" - + - name: Install activitysim run: | uv sync --locked --only-group github-action @@ -277,7 +279,7 @@ jobs: uses: actions/setup-python@v5 with: python-version-file: ".python-version" - + - name: Install activitysim run: | uv sync --locked --only-group github-action @@ -310,7 +312,7 @@ jobs: uses: actions/setup-python@v5 with: python-version-file: ".python-version" - + - name: Install activitysim run: | uv sync --locked --only-group github-action diff --git a/test/test_skim_name_conflicts.py b/test/test_skim_name_conflicts.py new file mode 100644 index 0000000000..eafd5ee426 --- /dev/null +++ b/test/test_skim_name_conflicts.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import os +import shutil +from importlib.resources import files +from pathlib import Path + +import openmatrix +import pytest + +import activitysim.abm # noqa: F401 +from activitysim.core import workflow + + +def example_path(dirname): + resource = files("activitysim.examples.placeholder_sandag").joinpath(dirname) + return str(resource) + + +def mtc_example_path(dirname): + resource = files("activitysim.examples.prototype_mtc").joinpath(dirname) + return str(resource) + + +def psrc_example_path(dirname): + resource = files("activitysim.examples.placeholder_psrc").joinpath(dirname) + return str(resource) + + +@pytest.fixture(scope="session") +def example_data_dir(tmp_path_factory) -> Path: + """Fixture to provide the path to the example data directory.""" + td = tmp_path_factory.mktemp("skim-conflict-data") + shutil.copytree(example_path("data_2"), td.joinpath("data_2")) + shutil.copy( + example_path(os.path.join("data_3", "maz_to_maz_bike.csv")), + td.joinpath("data_2"), + ) + + # add extra skims to OMX to create a conflict + + with openmatrix.open_file(td.joinpath("data_2").joinpath("skims1.omx"), "a") as omx: + for t in ["EA", "AM", "MD", "PM", "EV"]: + # Create a new matrix for each time period + omx.createMatrix(f"DISTBIKE__{t}", obj=omx["DISTBIKE"][:]) + + return td.joinpath("data_2") + + +def test_skim_name_conflicts(example_data_dir, tmp_path_factory): + # when sharrow is required, the run should fail due to conflicting skim names + state = workflow.State.make_default( + data_dir=example_data_dir, + configs_dir=( + example_path("configs_2_zone"), + psrc_example_path("configs"), + ), + output_dir=tmp_path_factory.mktemp("out-fail"), + settings={ + "households_sample_size": 20, + "sharrow": "require", + "disable_zarr": True, + }, + ) + with pytest.raises(ValueError): + state.run( + [ + "initialize_landuse", + "initialize_households", + ] + ) + + +def test_skim_name_conflicts_no_sharrow(example_data_dir, tmp_path_factory): + # when sharrow is disabled, the run should warn about conflicting skim names but not fail + state = workflow.State.make_default( + data_dir=example_data_dir, + configs_dir=( + example_path("configs_2_zone"), + psrc_example_path("configs"), + ), + output_dir=tmp_path_factory.mktemp("out-pass"), + settings={ + "households_sample_size": 20, + "sharrow": False, + "disable_zarr": True, + }, + ) + # Run the beginning workflow with the modified settings, should only warn about the conflict + with pytest.warns( + UserWarning, + match="some skims have both time-dependent and time-agnostic versions", + ): + state.run( + [ + "initialize_landuse", + "initialize_households", + ] + ) + + +@pytest.mark.parametrize("solution", ["^DISTBIKE$", "^DISTBIKE__.+"]) +def test_skim_name_conflicts_ok(example_data_dir, tmp_path_factory, solution): + # when sharrow is required, and omx_ignore_patterns is set correctly, + # the run should work without raising an error + state = workflow.State.make_default( + data_dir=example_data_dir, + configs_dir=( + example_path("configs_2_zone"), + psrc_example_path("configs"), + ), + output_dir=tmp_path_factory.mktemp("out-solved"), + settings={ + "households_sample_size": 20, + "sharrow": "require", + "disable_zarr": True, + "omx_ignore_patterns": [solution], + }, + ) + state.run( + [ + "initialize_landuse", + "initialize_households", + ] + )