From ef735e034e08a93bbae16e9065e9de9112b28688 Mon Sep 17 00:00:00 2001 From: Farid Yagubbayli Date: Thu, 12 Feb 2026 15:19:56 +0000 Subject: [PATCH] Collect & Compare Example Inputs/Outputs --- Makefile | 19 +- examples/checkpointing/checkpoint.py | 59 ++++-- .../sd_directivity_modelling_2D.py | 7 +- .../sd_focussed_detector_2D.py | 6 +- .../sd_focussed_detector_3D.py | 6 +- .../us_bmode_linear_transducer.py | 2 +- .../us_bmode_phased_array.py | 6 +- .../save_to_disk_func.py | 11 +- kwave/kspaceFirstOrder2D.py | 6 +- kwave/kspaceFirstOrder3D.py | 6 +- kwave/kspaceFirstOrderAS.py | 6 +- kwave/options/simulation_options.py | 139 +++++++++++- scripts/compare_example_outputs.py | 198 ++++++++++++++++++ tests/test_examples_rng_policy.py | 90 ++++++++ tests/test_simulation_options_output_paths.py | 103 +++++++++ 15 files changed, 612 insertions(+), 52 deletions(-) create mode 100644 scripts/compare_example_outputs.py create mode 100644 tests/test_examples_rng_policy.py create mode 100644 tests/test_simulation_options_output_paths.py diff --git a/Makefile b/Makefile index eea052dc4..981e4a828 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,9 @@ KWAVE_MATLAB_PATH = $(abspath ../k-wave) # Get absolute path of k-wave directory # Define the artifact directory COLLECTED_VALUES_DIR = $(abspath tests/matlab_test_data_collectors/python_testers/collectedValues) +EXAMPLE_OUTPUT_DIR = /tmp/example_runs +COMPARE_OUTPUT_DIR_A ?= /tmp/example_runs +COMPARE_OUTPUT_DIR_B ?= /tmp/example_runs_1 # Default target all: run-examples test @@ -17,7 +20,19 @@ all: run-examples test # Target to run all examples run-examples: @echo "Running all examples..." - @MPLBACKEND=Agg $(PYTHON) run_examples.py + @PYTHONPATH="$(CURDIR):$$PYTHONPATH" MPLBACKEND=Agg $(PYTHON) run_examples.py + +# Target to run all examples and collect non-dated HDF5 inputs/outputs +run-examples-no-dates: + @echo "Running all examples with non-dated HDF5 filenames..." + @rm -rf $(EXAMPLE_OUTPUT_DIR) + @PYTHONPATH="$(CURDIR):$$PYTHONPATH" KWAVE_USE_DATED_FILENAMES=0 MPLBACKEND=Agg $(PYTHON) run_examples.py + @echo "Collected example files in $(EXAMPLE_OUTPUT_DIR)" + +# Target to compare two example output roots +compare-example-outputs: + @echo "Comparing HDF outputs: $(COMPARE_OUTPUT_DIR_A) vs $(COMPARE_OUTPUT_DIR_B)" + @PYTHONPATH="$(CURDIR):$$PYTHONPATH" $(PYTHON) scripts/compare_example_outputs.py $(COMPARE_OUTPUT_DIR_A) $(COMPARE_OUTPUT_DIR_B) # Target to run pytest, which depends on running the MATLAB script first test: $(COLLECTED_VALUES_DIR) @@ -41,4 +56,4 @@ clean-collected_values: @echo "Cleaning collected values directory..." @rm -rf $(COLLECTED_VALUES_DIR) -.PHONY: all run-examples run-tests clean-python clean-collected_values clean +.PHONY: all run-examples run-examples-no-dates compare-example-outputs run-tests clean-python clean-collected_values clean diff --git a/examples/checkpointing/checkpoint.py b/examples/checkpointing/checkpoint.py index 1f4590dfd..1be3b7616 100644 --- a/examples/checkpointing/checkpoint.py +++ b/examples/checkpointing/checkpoint.py @@ -1,6 +1,5 @@ -from copy import copy from pathlib import Path -from tempfile import TemporaryDirectory +from shutil import copy2 import numpy as np @@ -15,6 +14,8 @@ from kwave.utils.filters import smooth from kwave.utils.mapgen import make_ball +EXAMPLE_OUTPUT_DIR = Path("/tmp/example_runs/checkpointing") + # This script demonstrates how to use the checkpointing feature of k-Wave. # It runs the same simulation twice, with the second run starting from a # checkpoint file created during the first run. @@ -30,6 +31,17 @@ # Note: checkpoint timesteps and checkpoint interval must be integers. +def _next_available_path(path: Path) -> Path: + if not path.exists(): + return path + suffix = 1 + while True: + candidate = path.with_name(f"{path.stem}_{suffix}{path.suffix}") + if not candidate.exists(): + return candidate + suffix += 1 + + def make_simulation_parameters(directory: Path, checkpoint_timesteps: int): """ See the 3D FFT Reconstruction For A Planar Sensor example for context. @@ -71,6 +83,7 @@ def make_simulation_parameters(directory: Path, checkpoint_timesteps: int): pml_inside=False, smooth_p0=False, data_cast="single", + allow_file_overwrite=True, input_filename=input_filename, output_filename=output_filename, ) @@ -84,23 +97,31 @@ def main(): # the halfway point. checkpoint_timesteps = 106 - with TemporaryDirectory() as tmpdir: - tmpdir = Path(tmpdir) - # create the simulation parameters for the 1st run - kgrid, medium, source, sensor, simulation_options, execution_options = make_simulation_parameters( - directory=tmpdir, checkpoint_timesteps=checkpoint_timesteps - ) - kspaceFirstOrder3D(kgrid, source, sensor, medium, simulation_options, execution_options) - print("Temporary directory contains 3 files (including checkpoint):") - print("\t-", "\n\t- ".join([f.name for f in tmpdir.glob("*.h5")])) - - # create the simulation parameters for the 2nd run - kgrid, medium, source, sensor, simulation_options, execution_options = make_simulation_parameters( - directory=tmpdir, checkpoint_timesteps=checkpoint_timesteps - ) - kspaceFirstOrder3D(kgrid, source, sensor, medium, simulation_options, execution_options) - print("Temporary directory contains 2 files (checkpoint has been deleted):") - print("\t-", "\n\t- ".join([f.name for f in tmpdir.glob("*.h5")])) + EXAMPLE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + for filename in ("kwave_input.h5", "kwave_output.h5", "kwave_checkpoint.h5"): + (EXAMPLE_OUTPUT_DIR / filename).unlink(missing_ok=True) + + # create the simulation parameters for the 1st run + kgrid, medium, source, sensor, simulation_options, execution_options = make_simulation_parameters( + directory=EXAMPLE_OUTPUT_DIR, + checkpoint_timesteps=checkpoint_timesteps, + ) + kspaceFirstOrder3D(kgrid, source, sensor, medium, simulation_options, execution_options) + copy2(simulation_options.input_filename, _next_available_path(EXAMPLE_OUTPUT_DIR / "run_1_kwave_input.h5")) + copy2(simulation_options.output_filename, _next_available_path(EXAMPLE_OUTPUT_DIR / "run_1_kwave_output.h5")) + print("Checkpoint output directory after run_1:") + print("\t-", "\n\t- ".join([f.name for f in EXAMPLE_OUTPUT_DIR.glob("*.h5")])) + + # create the simulation parameters for the 2nd run + kgrid, medium, source, sensor, simulation_options, execution_options = make_simulation_parameters( + directory=EXAMPLE_OUTPUT_DIR, + checkpoint_timesteps=checkpoint_timesteps, + ) + kspaceFirstOrder3D(kgrid, source, sensor, medium, simulation_options, execution_options) + copy2(simulation_options.input_filename, _next_available_path(EXAMPLE_OUTPUT_DIR / "run_2_kwave_input.h5")) + copy2(simulation_options.output_filename, _next_available_path(EXAMPLE_OUTPUT_DIR / "run_2_kwave_output.h5")) + print("Checkpoint output directory after run_2:") + print("\t-", "\n\t- ".join([f.name for f in EXAMPLE_OUTPUT_DIR.glob("*.h5")])) if __name__ == "__main__": diff --git a/examples/sd_directivity_modelling_2D/sd_directivity_modelling_2D.py b/examples/sd_directivity_modelling_2D/sd_directivity_modelling_2D.py index d7b21e8f7..8f9699582 100644 --- a/examples/sd_directivity_modelling_2D/sd_directivity_modelling_2D.py +++ b/examples/sd_directivity_modelling_2D/sd_directivity_modelling_2D.py @@ -1,6 +1,4 @@ -import os from copy import deepcopy -from tempfile import gettempdir import matplotlib.pyplot as plt import numpy as np @@ -73,9 +71,8 @@ # run the simulation input_filename = f"example_input_{source_loop + 1}_input.h5" - pathname = gettempdir() - input_file_full_path = os.path.join(pathname, input_filename) - simulation_options = SimulationOptions(save_to_disk=True, input_filename=input_filename, data_path=pathname) + output_filename = f"example_output_{source_loop + 1}_output.h5" + simulation_options = SimulationOptions(save_to_disk=True, input_filename=input_filename, output_filename=output_filename) # run the simulation sensor_data = kspaceFirstOrder2DC( medium=medium, diff --git a/examples/sd_focussed_detector_2D/sd_focussed_detector_2D.py b/examples/sd_focussed_detector_2D/sd_focussed_detector_2D.py index 38091b941..7c7131ba5 100644 --- a/examples/sd_focussed_detector_2D/sd_focussed_detector_2D.py +++ b/examples/sd_focussed_detector_2D/sd_focussed_detector_2D.py @@ -1,9 +1,7 @@ # # Focussed Detector In 2D Example # This example shows how k-Wave-python can be used to model the output of a focused semicircular detector, where the directionality arises from spatially averaging across the detector surface. Unlike the original example in k-Wave, this example does not visualize the simulation, as this functionality is not intrinsically supported by the accelerated binaries. -import os from copy import deepcopy -from tempfile import gettempdir import matplotlib.pyplot as plt import numpy as np @@ -43,9 +41,7 @@ # ## Define simulation parameters input_filename = "example_sd_focused_2d_input.h5" -pathname = gettempdir() -input_file_full_path = os.path.join(pathname, input_filename) -simulation_options = SimulationOptions(save_to_disk=True, input_filename=input_filename, data_path=pathname) +simulation_options = SimulationOptions(save_to_disk=True, input_filename=input_filename) # ## Run simulation with first source diff --git a/examples/sd_focussed_detector_3D/sd_focussed_detector_3D.py b/examples/sd_focussed_detector_3D/sd_focussed_detector_3D.py index 03e5fd6e5..ecb5c9afb 100644 --- a/examples/sd_focussed_detector_3D/sd_focussed_detector_3D.py +++ b/examples/sd_focussed_detector_3D/sd_focussed_detector_3D.py @@ -1,9 +1,7 @@ # Focussed Detector In 3D Example # This example shows how k-Wave can be used to model the output of a focussed bowl detector where the directionality arises from spatially averaging across the detector surface. -import os from copy import deepcopy -from tempfile import gettempdir import matplotlib.pyplot as plt import numpy as np @@ -33,10 +31,8 @@ _ = kgrid.makeTime(medium.sound_speed) input_filename = "example_sd_focused_3d_input.h5" -pathname = gettempdir() -input_file_full_path = os.path.join(pathname, input_filename) simulation_options = SimulationOptions( - save_to_disk=True, input_filename=input_filename, data_path=pathname, pml_size=10, data_cast="single" + save_to_disk=True, input_filename=input_filename, pml_size=10, data_cast="single" ) # create a concave sensor diff --git a/examples/us_bmode_linear_transducer/us_bmode_linear_transducer.py b/examples/us_bmode_linear_transducer/us_bmode_linear_transducer.py index 4cd1f8251..6e3ebab8d 100644 --- a/examples/us_bmode_linear_transducer/us_bmode_linear_transducer.py +++ b/examples/us_bmode_linear_transducer/us_bmode_linear_transducer.py @@ -25,7 +25,7 @@ # simulation settings DATA_CAST = "single" -RUN_SIMULATION = False +RUN_SIMULATION = True pml_size_points = Vector([20, 10, 10]) # [grid points] grid_size_points = Vector([256, 128, 128]) - 2 * pml_size_points # [grid points] diff --git a/examples/us_bmode_phased_array/us_bmode_phased_array.py b/examples/us_bmode_phased_array/us_bmode_phased_array.py index 823d17964..059e41c07 100644 --- a/examples/us_bmode_phased_array/us_bmode_phased_array.py +++ b/examples/us_bmode_phased_array/us_bmode_phased_array.py @@ -22,6 +22,7 @@ # simulation settings DATA_CAST = "single" RUN_SIMULATION = True +RNG_SEED = 123456 pml_size_points = Vector([15, 10, 10]) # [grid points] @@ -83,18 +84,19 @@ not_transducer = NotATransducer(transducer, kgrid, **not_transducer) +rng = np.random.default_rng(RNG_SEED) # Define a random distribution of scatterers for the medium background_map_mean = 1 background_map_std = 0.008 -background_map = background_map_mean + background_map_std * np.random.randn(kgrid.Nx, kgrid.Ny, kgrid.Nz) +background_map = background_map_mean + background_map_std * rng.standard_normal((kgrid.Nx, kgrid.Ny, kgrid.Nz)) sound_speed_map = c0 * background_map density_map = rho0 * background_map # Define a random distribution of scatterers for the highly scattering region -scattering_map = np.random.randn(kgrid.Nx, kgrid.Ny, kgrid.Nz) +scattering_map = rng.standard_normal((kgrid.Nx, kgrid.Ny, kgrid.Nz)) scattering_c0 = np.clip(c0 + 25 + 75 * scattering_map, 1400, 1600) scattering_rho0 = scattering_c0 / 1.5 diff --git a/kwave/kWaveSimulation_helper/save_to_disk_func.py b/kwave/kWaveSimulation_helper/save_to_disk_func.py index 178b7a714..f72fcfa75 100644 --- a/kwave/kWaveSimulation_helper/save_to_disk_func.py +++ b/kwave/kWaveSimulation_helper/save_to_disk_func.py @@ -15,7 +15,14 @@ def save_to_disk_func( - kgrid: kWaveGrid, medium: kWaveMedium, source, opt: SimulationOptions, auto_chunk: bool, values: dotdict, flags: dotdict + kgrid: kWaveGrid, + medium: kWaveMedium, + source, + opt: SimulationOptions, + auto_chunk: bool, + values: dotdict, + flags: dotdict, + input_filename: str | None = None, ): # update command line status logging.log(logging.INFO, f" precomputation completed in {scale_time(TicToc.toc())}") @@ -63,7 +70,7 @@ def save_to_disk_func( # ========================================================================= remove_z_dimension(float_variables, kgrid.dim) - save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level, auto_chunk=auto_chunk) + save_file(input_filename or opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level, auto_chunk=auto_chunk) # update command line status logging.log(logging.INFO, f" completed in {scale_time(TicToc.toc())}") diff --git a/kwave/kspaceFirstOrder2D.py b/kwave/kspaceFirstOrder2D.py index 3806e6cc0..de87b82a0 100644 --- a/kwave/kspaceFirstOrder2D.py +++ b/kwave/kspaceFirstOrder2D.py @@ -12,7 +12,7 @@ from kwave.kWaveSimulation import kWaveSimulation from kwave.kWaveSimulation_helper import retract_transducer_grid_size, save_to_disk_func from kwave.options.simulation_execution_options import SimulationExecutionOptions -from kwave.options.simulation_options import SimulationOptions +from kwave.options.simulation_options import SimulationOptions, resolve_filenames_for_run from kwave.utils.dotdictionary import dotdict from kwave.utils.interp import interpolate2d from kwave.utils.pml import get_pml @@ -300,6 +300,7 @@ def kspaceFirstOrder2D( if options.save_to_disk: # store the pml size for resizing transducer object below retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]] + input_filename, output_filename = resolve_filenames_for_run(k_sim.options) # run subscript to save files to disk save_to_disk_func( @@ -348,6 +349,7 @@ def kspaceFirstOrder2D( "cuboid_corners": k_sim.cuboid_corners, } ), + input_filename=input_filename, ) # run subscript to resize the transducer object if the grid has been expanded @@ -359,5 +361,5 @@ def kspaceFirstOrder2D( executor = Executor(simulation_options=simulation_options, execution_options=execution_options) executor_options = execution_options.as_list(sensor=k_sim.sensor) - sensor_data = executor.run_simulation(k_sim.options.input_filename, k_sim.options.output_filename, options=executor_options) + sensor_data = executor.run_simulation(input_filename, output_filename, options=executor_options) return sensor_data diff --git a/kwave/kspaceFirstOrder3D.py b/kwave/kspaceFirstOrder3D.py index be6c904a7..097075343 100644 --- a/kwave/kspaceFirstOrder3D.py +++ b/kwave/kspaceFirstOrder3D.py @@ -12,7 +12,7 @@ from kwave.kWaveSimulation import kWaveSimulation from kwave.kWaveSimulation_helper import retract_transducer_grid_size, save_to_disk_func from kwave.options.simulation_execution_options import SimulationExecutionOptions -from kwave.options.simulation_options import SimulationOptions +from kwave.options.simulation_options import SimulationOptions, resolve_filenames_for_run from kwave.utils.dotdictionary import dotdict from kwave.utils.interp import interpolate3d from kwave.utils.pml import get_pml @@ -313,6 +313,7 @@ def kspaceFirstOrder3D( if options.save_to_disk: # store the pml size for resizing transducer object below retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]] + input_filename, output_filename = resolve_filenames_for_run(k_sim.options) # run subscript to save files to disk save_to_disk_func( @@ -361,6 +362,7 @@ def kspaceFirstOrder3D( "cuboid_corners": k_sim.cuboid_corners, } ), + input_filename=input_filename, ) # run subscript to resize the transducer object if the grid has been expanded @@ -372,5 +374,5 @@ def kspaceFirstOrder3D( executor = Executor(simulation_options=simulation_options, execution_options=execution_options) executor_options = execution_options.as_list(sensor=k_sim.sensor) - sensor_data = executor.run_simulation(k_sim.options.input_filename, k_sim.options.output_filename, options=executor_options) + sensor_data = executor.run_simulation(input_filename, output_filename, options=executor_options) return sensor_data diff --git a/kwave/kspaceFirstOrderAS.py b/kwave/kspaceFirstOrderAS.py index a0c26a616..401862902 100644 --- a/kwave/kspaceFirstOrderAS.py +++ b/kwave/kspaceFirstOrderAS.py @@ -14,7 +14,7 @@ from kwave.kWaveSimulation import kWaveSimulation from kwave.kWaveSimulation_helper import retract_transducer_grid_size, save_to_disk_func from kwave.options.simulation_execution_options import SimulationExecutionOptions -from kwave.options.simulation_options import SimulationOptions, SimulationType +from kwave.options.simulation_options import SimulationOptions, SimulationType, resolve_filenames_for_run from kwave.utils.dotdictionary import dotdict from kwave.utils.interp import interpolate2d from kwave.utils.math import sinc @@ -307,6 +307,7 @@ def kspaceFirstOrderAS( if options.save_to_disk: # store the pml size for resizing transducer object below retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]] + input_filename, output_filename = resolve_filenames_for_run(k_sim.options) # run subscript to save files to disk save_to_disk_func( @@ -355,6 +356,7 @@ def kspaceFirstOrderAS( "cuboid_corners": k_sim.cuboid_corners, } ), + input_filename=input_filename, ) # run subscript to resize the transducer object if the grid has been expanded @@ -366,5 +368,5 @@ def kspaceFirstOrderAS( executor = Executor(simulation_options=simulation_options, execution_options=execution_options) executor_options = execution_options.as_list(sensor=k_sim.sensor) - sensor_data = executor.run_simulation(k_sim.options.input_filename, k_sim.options.output_filename, options=executor_options) + sensor_data = executor.run_simulation(input_filename, output_filename, options=executor_options) return sensor_data diff --git a/kwave/options/simulation_options.py b/kwave/options/simulation_options.py index 15da21b17..772eee366 100644 --- a/kwave/options/simulation_options.py +++ b/kwave/options/simulation_options.py @@ -1,8 +1,10 @@ from __future__ import annotations import os +import sys from dataclasses import dataclass, field from enum import Enum +from pathlib import Path from tempfile import gettempdir from typing import TYPE_CHECKING, List, Optional @@ -15,6 +17,72 @@ from kwave.utils.io import get_h5_literals from kwave.utils.pml import get_optimal_pml_size +EXAMPLE_OUTPUT_ROOT = Path("/tmp/example_runs") +USE_DATED_FILENAMES_ENV = "KWAVE_USE_DATED_FILENAMES" + + +def _read_bool_env(name: str, default: bool) -> bool: + value = os.getenv(name) + if value is None: + return default + normalized = value.strip().lower() + if normalized in {"1", "true", "yes", "y", "on"}: + return True + if normalized in {"0", "false", "no", "n", "off"}: + return False + return default + + +def _detect_example_name() -> Optional[str]: + script_path = sys.argv[0] if sys.argv else "" + if not script_path: + return None + parts = Path(script_path).resolve().parts + for index, part in enumerate(parts[:-1]): + if part == "examples" and index + 1 < len(parts): + return parts[index + 1] + return None + + +def _default_data_path() -> str: + example_name = _detect_example_name() + if example_name: + return str(EXAMPLE_OUTPUT_ROOT / example_name) + return gettempdir() + + +def _default_io_filenames(use_dated_filenames: bool) -> tuple[str, str]: + if use_dated_filenames: + prefix = get_date_string() + return f"{prefix}_kwave_input.h5", f"{prefix}_kwave_output.h5" + return "kwave_input.h5", "kwave_output.h5" + + +def _derive_output_filename_from_input(input_filename: str | os.PathLike[str]) -> str: + input_path = Path(input_filename) + output_suffix = input_path.suffix if input_path.suffix else ".h5" + output_stem = input_path.stem + if "input" in output_stem: + output_stem = output_stem.replace("input", "output") + else: + output_stem = f"{output_stem}_output" + return str(input_path.with_name(f"{output_stem}{output_suffix}")) + + +def _resolve_to_data_path(data_path: str, filename: str | os.PathLike[str]) -> str: + filename_path = Path(filename) + if filename_path.is_absolute(): + return str(filename_path) + return str(Path(data_path) / filename_path) + + +def _with_numeric_suffix(path: Path, suffix_number: int) -> Path: + return path.with_name(f"{path.stem}_{suffix_number}{path.suffix}") + + +def _is_within_example_output_root(path: Path) -> bool: + return path.resolve().is_relative_to(EXAMPLE_OUTPUT_ROOT.resolve()) + class SimulationType(Enum): """ @@ -108,9 +176,10 @@ class SimulationOptions(object): pml_search_range: List[int] = field(default_factory=lambda: [10, 40]) radial_symmetry: str = "WSWA-FFT" multi_axial_PML_ratio: float = 0.1 - data_path: Optional[str] = field(default_factory=lambda: gettempdir()) - input_filename: Optional[str] = field(default_factory=lambda: f"{get_date_string()}_kwave_input.h5") - output_filename: Optional[str] = field(default_factory=lambda: f"{get_date_string()}_kwave_output.h5") + data_path: Optional[str] = None + input_filename: Optional[str] = None + output_filename: Optional[str] = None + allow_file_overwrite: bool = False pml_x_alpha: Optional[float] = None pml_y_alpha: Optional[float] = None pml_z_alpha: Optional[float] = None @@ -157,6 +226,7 @@ def __post_init__(self): "pml_inside": self.pml_inside, "create_log": self.create_log, "scale_source_terms": self.scale_source_terms, + "allow_file_overwrite": self.allow_file_overwrite, } for key, val in boolean_inputs.items(): @@ -185,9 +255,27 @@ def __post_init__(self): self.pml_y_alpha = self.pml_alpha if self.pml_y_alpha is None else self.pml_y_alpha self.pml_z_alpha = self.pml_alpha if self.pml_z_alpha is None else self.pml_z_alpha + if self.data_path is None: + self.data_path = _default_data_path() + self.data_path = os.fspath(self.data_path) + os.makedirs(self.data_path, exist_ok=True) + + use_dated_filenames = _read_bool_env(USE_DATED_FILENAMES_ENV, default=True) + default_input_filename, default_output_filename = _default_io_filenames(use_dated_filenames) + has_explicit_input_filename = self.input_filename is not None + if self.input_filename is None: + self.input_filename = default_input_filename + if self.output_filename is None: + if has_explicit_input_filename: + self.output_filename = _derive_output_filename_from_input(self.input_filename) + else: + self.output_filename = default_output_filename + # add pathname to input and output filenames - self.input_filename = os.path.join(self.data_path, self.input_filename) - self.output_filename = os.path.join(self.data_path, self.output_filename) + self.input_filename = _resolve_to_data_path(self.data_path, os.fspath(self.input_filename)) + self.output_filename = _resolve_to_data_path(self.data_path, os.fspath(self.output_filename)) + if isinstance(self.stream_to_disk, str): + self.stream_to_disk = _resolve_to_data_path(self.data_path, self.stream_to_disk) assert self.use_fd is None or ( np.issubdtype(self.use_fd, np.number) and self.use_fd in [2, 4] @@ -346,3 +434,44 @@ def option_factory(kgrid: "kWaveGrid", options: SimulationOptions): # cleanup unused variables del pml_size_temp return options + + +def resolve_filenames_for_run(options: SimulationOptions) -> tuple[str, str]: + if options.data_path is None: + options.data_path = _default_data_path() + + data_path = Path(options.data_path) + data_path.mkdir(parents=True, exist_ok=True) + + use_dated_filenames = _read_bool_env(USE_DATED_FILENAMES_ENV, default=True) + default_input_filename, default_output_filename = _default_io_filenames(use_dated_filenames) + + input_template = options.input_filename or default_input_filename + if options.output_filename is not None: + output_template = options.output_filename + elif options.input_filename is not None: + output_template = _derive_output_filename_from_input(options.input_filename) + else: + output_template = default_output_filename + + input_path = Path(_resolve_to_data_path(str(data_path), input_template)) + output_path = Path(_resolve_to_data_path(str(data_path), output_template)) + input_path.parent.mkdir(parents=True, exist_ok=True) + output_path.parent.mkdir(parents=True, exist_ok=True) + + if options.allow_file_overwrite: + return str(input_path), str(output_path) + + if not (_is_within_example_output_root(input_path) and _is_within_example_output_root(output_path)): + return str(input_path), str(output_path) + + if not input_path.exists() and not output_path.exists(): + return str(input_path), str(output_path) + + suffix_number = 1 + while True: + candidate_input = _with_numeric_suffix(input_path, suffix_number) + candidate_output = _with_numeric_suffix(output_path, suffix_number) + if not candidate_input.exists() and not candidate_output.exists(): + return str(candidate_input), str(candidate_output) + suffix_number += 1 diff --git a/scripts/compare_example_outputs.py b/scripts/compare_example_outputs.py new file mode 100644 index 000000000..919cd191d --- /dev/null +++ b/scripts/compare_example_outputs.py @@ -0,0 +1,198 @@ +from __future__ import annotations + +import argparse +import sys +from pathlib import Path + +import h5py +import numpy as np + + +def collect_hdf_files(root: Path, suffixes: tuple[str, ...]) -> dict[str, Path]: + files: dict[str, Path] = {} + suffix_set = {suffix.lower() for suffix in suffixes} + for path in root.rglob("*"): + if path.is_file() and path.suffix.lower() in suffix_set: + files[path.relative_to(root).as_posix()] = path + return files + + +def dataset_paths(hf: h5py.File) -> list[str]: + paths: list[str] = [] + + def visit(name: str, obj) -> None: + if isinstance(obj, h5py.Dataset): + paths.append(name) + + hf.visititems(visit) + paths.sort() + return paths + + +def arrays_equal(a: np.ndarray, b: np.ndarray) -> bool: + try: + return bool(np.array_equal(a, b, equal_nan=True)) + except TypeError: + return bool(np.array_equal(a, b)) + + +def max_abs_diff(a: np.ndarray, b: np.ndarray) -> float | None: + if np.issubdtype(a.dtype, np.floating) or np.issubdtype(a.dtype, np.complexfloating): + diff = np.abs(a - b) + if diff.size == 0: + return 0.0 + return float(np.nanmax(diff)) + return None + + +def compare_hdf_pair(path_a: Path, path_b: Path, max_value_mismatches: int) -> list[str]: + issues: list[str] = [] + with h5py.File(path_a, "r") as h5_a, h5py.File(path_b, "r") as h5_b: + datasets_a = set(dataset_paths(h5_a)) + datasets_b = set(dataset_paths(h5_b)) + + only_a = sorted(datasets_a - datasets_b) + only_b = sorted(datasets_b - datasets_a) + if only_a: + examples = ", ".join(only_a[:5]) + issues.append(f"datasets missing in second file: {len(only_a)} (examples: {examples})") + if only_b: + examples = ", ".join(only_b[:5]) + issues.append(f"datasets missing in first file: {len(only_b)} (examples: {examples})") + + mismatch_count = 0 + for dataset in sorted(datasets_a & datasets_b): + dset_a = h5_a[dataset] + dset_b = h5_b[dataset] + + if dset_a.shape != dset_b.shape: + issues.append(f"shape mismatch for '{dataset}': {dset_a.shape} vs {dset_b.shape}") + continue + if dset_a.dtype != dset_b.dtype: + issues.append(f"dtype mismatch for '{dataset}': {dset_a.dtype} vs {dset_b.dtype}") + continue + + value_a = dset_a[()] + value_b = dset_b[()] + if arrays_equal(value_a, value_b): + continue + + mismatch_count += 1 + diff = max_abs_diff(value_a, value_b) + if diff is None: + issues.append(f"value mismatch for '{dataset}'") + else: + issues.append(f"value mismatch for '{dataset}', max_abs_diff={diff}") + + if mismatch_count >= max_value_mismatches: + issues.append(f"stopped after {max_value_mismatches} value mismatches in this file") + break + + return issues + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Compare two example output roots recursively by relative HDF filename and dataset content. " + "Metadata and attributes are not compared." + ) + ) + parser.add_argument("first_root", type=Path, help="First output root (e.g. /tmp/example_runs)") + parser.add_argument("second_root", type=Path, help="Second output root (e.g. /tmp/example_runs_1)") + parser.add_argument( + "--suffixes", + nargs="+", + default=[".h5", ".hdf5", ".hdf"], + help="HDF file suffixes to include (default: .h5 .hdf5 .hdf)", + ) + parser.add_argument( + "--max-value-mismatches-per-file", + type=int, + default=5, + help="Max per-file value mismatch entries before truncation (default: 5)", + ) + parser.add_argument( + "--max-report-files", + type=int, + default=20, + help="Max missing/mismatched files to print in detail (default: 20)", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + first_root = args.first_root + second_root = args.second_root + + if not first_root.exists(): + print(f"error: path does not exist: {first_root}") + return 2 + if not second_root.exists(): + print(f"error: path does not exist: {second_root}") + return 2 + + files_a = collect_hdf_files(first_root, tuple(args.suffixes)) + files_b = collect_hdf_files(second_root, tuple(args.suffixes)) + + set_a = set(files_a) + set_b = set(files_b) + + missing_in_second = sorted(set_a - set_b) + missing_in_first = sorted(set_b - set_a) + common_files = sorted(set_a & set_b) + + mismatched_files: list[tuple[str, list[str]]] = [] + for relative_path in common_files: + file_a = files_a[relative_path] + file_b = files_b[relative_path] + try: + issues = compare_hdf_pair(file_a, file_b, args.max_value_mismatches_per_file) + except Exception as error: + issues = [f"read/compare error: {error}"] + + if issues: + mismatched_files.append((relative_path, issues)) + + print("Comparison summary") + print(f"- first root files: {len(set_a)}") + print(f"- second root files: {len(set_b)}") + print(f"- common files checked: {len(common_files)}") + print(f"- missing in second: {len(missing_in_second)}") + print(f"- missing in first: {len(missing_in_first)}") + print(f"- mismatched files: {len(mismatched_files)}") + + if missing_in_second: + print("\nFiles missing in second root:") + for relative_path in missing_in_second[: args.max_report_files]: + print(f" - {relative_path}") + if len(missing_in_second) > args.max_report_files: + print(f" ... and {len(missing_in_second) - args.max_report_files} more") + + if missing_in_first: + print("\nFiles missing in first root:") + for relative_path in missing_in_first[: args.max_report_files]: + print(f" - {relative_path}") + if len(missing_in_first) > args.max_report_files: + print(f" ... and {len(missing_in_first) - args.max_report_files} more") + + if mismatched_files: + print("\nMismatched files:") + for relative_path, issues in mismatched_files[: args.max_report_files]: + print(f" - {relative_path}") + for issue in issues: + print(f" * {issue}") + if len(mismatched_files) > args.max_report_files: + print(f" ... and {len(mismatched_files) - args.max_report_files} more") + + has_failures = bool(missing_in_second or missing_in_first or mismatched_files) + if has_failures: + return 1 + + print("\nAll compared HDF files and dataset contents match 1:1.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_examples_rng_policy.py b/tests/test_examples_rng_policy.py new file mode 100644 index 000000000..c846ca13e --- /dev/null +++ b/tests/test_examples_rng_policy.py @@ -0,0 +1,90 @@ +import ast +from pathlib import Path + + +def _has_explicit_seed(call_node: ast.Call) -> bool: + if call_node.args: + first_arg = call_node.args[0] + if isinstance(first_arg, ast.Constant) and first_arg.value is None: + return False + return True + for keyword in call_node.keywords: + if keyword.arg == "seed": + if isinstance(keyword.value, ast.Constant) and keyword.value.value is None: + return False + return True + return False + + +def _is_numpy_random_call(call_node: ast.Call, numpy_aliases: set[str]) -> bool: + func = call_node.func + return ( + isinstance(func, ast.Attribute) + and isinstance(func.value, ast.Attribute) + and isinstance(func.value.value, ast.Name) + and func.value.value.id in numpy_aliases + and func.value.attr == "random" + ) + + +def test_examples_do_not_use_unseeded_randomness(): + repo_root = Path(__file__).resolve().parents[1] + examples_dir = repo_root / "examples" + disallowed_calls: list[str] = [] + + for example_file in sorted(examples_dir.rglob("*.py")): + source = example_file.read_text(encoding="utf-8") + tree = ast.parse(source, filename=str(example_file)) + + numpy_aliases: set[str] = set() + numpy_random_imports: dict[str, str] = {} + random_module_aliases: set[str] = set() + random_imported_names: set[str] = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + if alias.name == "numpy": + numpy_aliases.add(alias.asname or alias.name) + if alias.name == "random": + random_module_aliases.add(alias.asname or alias.name) + elif isinstance(node, ast.ImportFrom): + if node.module == "numpy.random": + for alias in node.names: + imported_as = alias.asname or alias.name + numpy_random_imports[imported_as] = alias.name + if node.module == "random": + for alias in node.names: + random_imported_names.add(alias.asname or alias.name) + + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + + if _is_numpy_random_call(node, numpy_aliases): + random_fn_name = node.func.attr + if random_fn_name == "default_rng": + if not _has_explicit_seed(node): + disallowed_calls.append(f"{example_file}:{node.lineno} np.random.default_rng called without seed") + elif random_fn_name != "seed": + disallowed_calls.append(f"{example_file}:{node.lineno} np.random.{random_fn_name}") + continue + + if isinstance(node.func, ast.Name) and node.func.id in numpy_random_imports: + imported_name = numpy_random_imports[node.func.id] + if imported_name == "default_rng": + if not _has_explicit_seed(node): + disallowed_calls.append(f"{example_file}:{node.lineno} default_rng called without seed") + elif imported_name != "seed": + disallowed_calls.append(f"{example_file}:{node.lineno} numpy.random.{imported_name}") + continue + + if isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name): + if node.func.value.id in random_module_aliases: + disallowed_calls.append(f"{example_file}:{node.lineno} random.{node.func.attr}") + continue + + if isinstance(node.func, ast.Name) and node.func.id in random_imported_names: + disallowed_calls.append(f"{example_file}:{node.lineno} random.{node.func.id}") + + assert not disallowed_calls, "Unseeded or disallowed randomness in examples:\n" + "\n".join(disallowed_calls) diff --git a/tests/test_simulation_options_output_paths.py b/tests/test_simulation_options_output_paths.py new file mode 100644 index 000000000..08939dc18 --- /dev/null +++ b/tests/test_simulation_options_output_paths.py @@ -0,0 +1,103 @@ +import sys +from pathlib import Path + +import kwave.options.simulation_options as simulation_options_module +from kwave.options.simulation_options import SimulationOptions, resolve_filenames_for_run + + +def test_dated_filenames_are_enabled_by_default(tmp_path, monkeypatch): + monkeypatch.delenv("KWAVE_USE_DATED_FILENAMES", raising=False) + options = SimulationOptions(save_to_disk=True, data_path=tmp_path) + + assert Path(options.input_filename).name.endswith("_kwave_input.h5") + assert Path(options.output_filename).name.endswith("_kwave_output.h5") + assert Path(options.input_filename).name != "kwave_input.h5" + assert Path(options.output_filename).name != "kwave_output.h5" + + +def test_dated_filenames_can_be_disabled(tmp_path, monkeypatch): + monkeypatch.setenv("KWAVE_USE_DATED_FILENAMES", "0") + options = SimulationOptions(save_to_disk=True, data_path=tmp_path) + + assert Path(options.input_filename).name == "kwave_input.h5" + assert Path(options.output_filename).name == "kwave_output.h5" + + +def test_output_filename_is_derived_from_explicit_input_name(tmp_path): + options = SimulationOptions(save_to_disk=True, data_path=tmp_path, input_filename="example_input_7.h5") + + assert Path(options.input_filename).name == "example_input_7.h5" + assert Path(options.output_filename).name == "example_output_7.h5" + + +def test_output_filename_derivation_appends_output_when_input_token_missing(tmp_path): + options = SimulationOptions(save_to_disk=True, data_path=tmp_path, input_filename="sample_case.h5") + + assert Path(options.input_filename).name == "sample_case.h5" + assert Path(options.output_filename).name == "sample_case_output.h5" + + +def test_default_data_path_for_examples(monkeypatch, tmp_path): + example_root = tmp_path / "example_runs" + monkeypatch.setattr(simulation_options_module, "EXAMPLE_OUTPUT_ROOT", example_root) + monkeypatch.setattr(sys, "argv", ["/repo/examples/us_bmode_phased_array/us_bmode_phased_array.py"]) + + options = SimulationOptions(save_to_disk=True) + + assert options.data_path == str(example_root / "us_bmode_phased_array") + assert Path(options.data_path).exists() + assert Path(options.input_filename).parent == Path(options.data_path) + assert Path(options.output_filename).parent == Path(options.data_path) + + +def test_resolve_filenames_for_run_avoids_overwrites_under_example_root(tmp_path, monkeypatch): + example_root = tmp_path / "example_runs" + monkeypatch.setattr(simulation_options_module, "EXAMPLE_OUTPUT_ROOT", example_root) + + output_dir = example_root / "sd_focussed_detector_2D" + options = SimulationOptions( + save_to_disk=True, + data_path=output_dir, + input_filename="input.h5", + output_filename="output.h5", + ) + + first_input, first_output = resolve_filenames_for_run(options) + Path(first_input).touch() + Path(first_output).touch() + + second_input, second_output = resolve_filenames_for_run(options) + + assert first_input != second_input + assert first_output != second_output + assert second_input.endswith("input_1.h5") + assert second_output.endswith("output_1.h5") + + +def test_allow_file_overwrite_disables_suffixing(tmp_path, monkeypatch): + example_root = tmp_path / "example_runs" + monkeypatch.setattr(simulation_options_module, "EXAMPLE_OUTPUT_ROOT", example_root) + + output_dir = example_root / "checkpointing" + options = SimulationOptions( + save_to_disk=True, + data_path=output_dir, + input_filename="kwave_input.h5", + output_filename="kwave_output.h5", + allow_file_overwrite=True, + ) + + first_input, first_output = resolve_filenames_for_run(options) + Path(first_input).touch() + Path(first_output).touch() + + second_input, second_output = resolve_filenames_for_run(options) + + assert second_input == first_input + assert second_output == first_output + + +def test_stream_to_disk_relative_path_uses_data_path(tmp_path): + options = SimulationOptions(save_to_disk=True, data_path=tmp_path, stream_to_disk="harmonic_data.h5") + + assert options.stream_to_disk == str(tmp_path / "harmonic_data.h5")