From 9cec91fd108b9b6a881fb1a37ba4577a9ede7f3f Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Thu, 11 Sep 2025 17:22:36 +0100 Subject: [PATCH 1/4] Detect existing campaign and increment counter accordingly --- src/epyscan/__init__.py | 66 ++++++++++++++++++++++++++++++++++++----- tests/test_epyscan.py | 47 +++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/epyscan/__init__.py b/src/epyscan/__init__.py index 2c03625..05c3022 100644 --- a/src/epyscan/__init__.py +++ b/src/epyscan/__init__.py @@ -1,3 +1,4 @@ +import re from copy import deepcopy from pathlib import Path from typing import Union @@ -7,14 +8,65 @@ from scipy.stats import qmc +def allocate_next_run_number(root: Path): + """ + Return the next available run number in the given root directory. + + This function searches all existing run directories and returns the + highest run number plus 1, or 0 if no runs exist. Only the lowest-level + run directories are considered (e.g., 'run_0', 'run_50'). + + Parameters + ---------- + root : Path + The root directory to search for existing run directories. + + Returns + ------- + int + The next available run number, or 0 if none exist. + """ + run_nums = [ + int(path.name[4:]) for path in root.rglob("*") if re.fullmatch(r"run_\d+", path.name) + ] + + return max(run_nums, default=-1) + 1 + + def rundir_hierarchy(root: Path, run_num: int) -> Path: - """Create nested directory structure for a run""" + """ + Create the nested directory structure for a given run number. + + Campaign runs are organised into a hierarchy to avoid putting too many + files in a single directory. The structure follows `rundir_hierarchy`: + + ./run_0_1000000/run_0_10000/run_0_100/run_[0-99] + + As the run numbers increase, higher-level directories are split further: + + ./run_0_1000000/run_0_10000/run_100_200/run_[100-199] + ./run_0_1000000/run_10000_20000/run_10000_10100/run_[10000-10100] + + The hierarchy ensures that only the lowest-level directories (e.g., 'run_0', + 'run_50') contain individual simulation runs, while higher-level directories + manage grouping of runs in batches. + + Parameters + ---------- + root : Path + The root directory where the run hierarchy should be created. + run_num : int + The run number for which the directories will be created. + + Returns + ------- + Path + The full path to the newly created run directory. + """ def level_dir(exponent: int) -> Path: level = 100**exponent - return Path( - f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}" - ) + return Path(f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}") level_1_dir = level_dir(3) level_2_dir = level_dir(2) @@ -64,7 +116,7 @@ class Campaign: """ def __init__(self, template: dict, root: Union[str, Path]): - self._counter = 0 + self._counter = allocate_next_run_number(root) self.template = template self.root = Path(root) @@ -139,9 +191,7 @@ def __init__(self, parameters: dict, n_samples: int = 10): def _gridspace(start, stop, num: int, log: bool = False, endpoint: bool = True): """Generalisation over logspace/linspace""" if log: - return np.logspace( - np.log10(start), np.log10(stop), num=num, endpoint=endpoint - ) + return np.logspace(np.log10(start), np.log10(stop), num=num, endpoint=endpoint) return np.linspace(start, stop, num=num, endpoint=endpoint) diff --git a/tests/test_epyscan.py b/tests/test_epyscan.py index b226b07..553a73e 100644 --- a/tests/test_epyscan.py +++ b/tests/test_epyscan.py @@ -1,3 +1,5 @@ +from pathlib import Path + import epydeck import numpy as np @@ -81,9 +83,7 @@ def test_latin_hypercube(): for k, v in parameters.items(): intervals = np.linspace(v["min"], v["max"], n_samples + 1) if v.get("log", False): - intervals = np.logspace( - np.log10(v["min"]), np.log10(v["max"]), n_samples + 1 - ) + intervals = np.logspace(np.log10(v["min"]), np.log10(v["max"]), n_samples + 1) samples_for_k = np.array([sample[k] for sample in samples]) interval_counts = np.array( [ @@ -143,3 +143,44 @@ def test_campaign(tmp_path): actual_case_deck = epydeck.load(f) assert actual_case_deck == expected_case_deck + + +def test_load_existing_campaign(tmp_path): + # Simulate a pre-existing campaign by making fake paths + base_path = tmp_path / "run_0_1000000/run_0_10000/run_0_100" + + existing_campaign_paths = [ + base_path / "run_0", + base_path / "run_1", + base_path / "run_2", + base_path / "run_3", + base_path / "run_4", + ] + + for path in existing_campaign_paths: + Path(path).mkdir(parents=True, exist_ok=True) + + template = {"block": {"var4": 1.23}, "other_block": {"var5": True}} + campaign = epyscan.Campaign(template, tmp_path) + + new_path = campaign.setup_case({"block": {"var4": 1.24}}) + assert new_path == (base_path / "run_5") + assert new_path.exists() + + +def test_retrieve_existing_run_count(tmp_path): + base_path = tmp_path / "run_0_1000000/run_0_10000/run_0_100" + (base_path / "run_0").mkdir(parents=True) + (base_path / "run_1").mkdir() + (base_path / "run_50").mkdir() + (base_path / "run_100").mkdir() + + assert epyscan.allocate_next_run_number(tmp_path) == 101 + + +def test_retrieve_existing_run_count_edge_cases(tmp_path): + (tmp_path / "run_10").mkdir() + (tmp_path / "run_20_30").mkdir() + (tmp_path / "not_a_run").mkdir() + + assert epyscan.allocate_next_run_number(tmp_path) == 11 From 1a5af52518959fb1a7b8f71ce83d1418589aee04 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Thu, 11 Sep 2025 16:22:50 +0000 Subject: [PATCH 2/4] [skip ci] Apply black changes --- src/epyscan/__init__.py | 12 +++++++++--- tests/test_epyscan.py | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/epyscan/__init__.py b/src/epyscan/__init__.py index 05c3022..eabd5ff 100644 --- a/src/epyscan/__init__.py +++ b/src/epyscan/__init__.py @@ -27,7 +27,9 @@ def allocate_next_run_number(root: Path): The next available run number, or 0 if none exist. """ run_nums = [ - int(path.name[4:]) for path in root.rglob("*") if re.fullmatch(r"run_\d+", path.name) + int(path.name[4:]) + for path in root.rglob("*") + if re.fullmatch(r"run_\d+", path.name) ] return max(run_nums, default=-1) + 1 @@ -66,7 +68,9 @@ def rundir_hierarchy(root: Path, run_num: int) -> Path: def level_dir(exponent: int) -> Path: level = 100**exponent - return Path(f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}") + return Path( + f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}" + ) level_1_dir = level_dir(3) level_2_dir = level_dir(2) @@ -191,7 +195,9 @@ def __init__(self, parameters: dict, n_samples: int = 10): def _gridspace(start, stop, num: int, log: bool = False, endpoint: bool = True): """Generalisation over logspace/linspace""" if log: - return np.logspace(np.log10(start), np.log10(stop), num=num, endpoint=endpoint) + return np.logspace( + np.log10(start), np.log10(stop), num=num, endpoint=endpoint + ) return np.linspace(start, stop, num=num, endpoint=endpoint) diff --git a/tests/test_epyscan.py b/tests/test_epyscan.py index 553a73e..50ad7c8 100644 --- a/tests/test_epyscan.py +++ b/tests/test_epyscan.py @@ -83,7 +83,9 @@ def test_latin_hypercube(): for k, v in parameters.items(): intervals = np.linspace(v["min"], v["max"], n_samples + 1) if v.get("log", False): - intervals = np.logspace(np.log10(v["min"]), np.log10(v["max"]), n_samples + 1) + intervals = np.logspace( + np.log10(v["min"]), np.log10(v["max"]), n_samples + 1 + ) samples_for_k = np.array([sample[k] for sample in samples]) interval_counts = np.array( [ From 1eede3d24e455a84d652988b2c2f736aa4429e45 Mon Sep 17 00:00:00 2001 From: Joel Adams Date: Fri, 12 Sep 2025 12:55:05 +0100 Subject: [PATCH 3/4] Add append flag --- src/epyscan/__init__.py | 20 ++++++++------------ tests/test_epyscan.py | 28 ++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/epyscan/__init__.py b/src/epyscan/__init__.py index eabd5ff..b968c16 100644 --- a/src/epyscan/__init__.py +++ b/src/epyscan/__init__.py @@ -27,9 +27,7 @@ def allocate_next_run_number(root: Path): The next available run number, or 0 if none exist. """ run_nums = [ - int(path.name[4:]) - for path in root.rglob("*") - if re.fullmatch(r"run_\d+", path.name) + int(path.name[4:]) for path in root.rglob("*") if re.fullmatch(r"run_\d+", path.name) ] return max(run_nums, default=-1) + 1 @@ -68,9 +66,7 @@ def rundir_hierarchy(root: Path, run_num: int) -> Path: def level_dir(exponent: int) -> Path: level = 100**exponent - return Path( - f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}" - ) + return Path(f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}") level_1_dir = level_dir(3) level_2_dir = level_dir(2) @@ -108,10 +104,12 @@ class Campaign: Base template deck as a Python dict (for example, as created by `epydeck`) root: Path to root run directory + append: + Flag to decide whether to append new runs to the end of an existing + campaign (if True) or overwrite it (if False) Examples -------- - >>> with open("template.deck") as f: template = epydeck.load(f) >>> campaign = Campaign(template, "grid_root") @@ -119,8 +117,8 @@ class Campaign: """ - def __init__(self, template: dict, root: Union[str, Path]): - self._counter = allocate_next_run_number(root) + def __init__(self, template: dict, root: Union[str, Path], append: bool = False): + self._counter = allocate_next_run_number(root) if append else 0 self.template = template self.root = Path(root) @@ -195,9 +193,7 @@ def __init__(self, parameters: dict, n_samples: int = 10): def _gridspace(start, stop, num: int, log: bool = False, endpoint: bool = True): """Generalisation over logspace/linspace""" if log: - return np.logspace( - np.log10(start), np.log10(stop), num=num, endpoint=endpoint - ) + return np.logspace(np.log10(start), np.log10(stop), num=num, endpoint=endpoint) return np.linspace(start, stop, num=num, endpoint=endpoint) diff --git a/tests/test_epyscan.py b/tests/test_epyscan.py index 50ad7c8..02a4288 100644 --- a/tests/test_epyscan.py +++ b/tests/test_epyscan.py @@ -83,9 +83,7 @@ def test_latin_hypercube(): for k, v in parameters.items(): intervals = np.linspace(v["min"], v["max"], n_samples + 1) if v.get("log", False): - intervals = np.logspace( - np.log10(v["min"]), np.log10(v["max"]), n_samples + 1 - ) + intervals = np.logspace(np.log10(v["min"]), np.log10(v["max"]), n_samples + 1) samples_for_k = np.array([sample[k] for sample in samples]) interval_counts = np.array( [ @@ -163,13 +161,35 @@ def test_load_existing_campaign(tmp_path): Path(path).mkdir(parents=True, exist_ok=True) template = {"block": {"var4": 1.23}, "other_block": {"var5": True}} - campaign = epyscan.Campaign(template, tmp_path) + campaign = epyscan.Campaign(template, tmp_path, append=True) new_path = campaign.setup_case({"block": {"var4": 1.24}}) assert new_path == (base_path / "run_5") assert new_path.exists() +def test_override_existing_campaign(tmp_path): + base_path = tmp_path / "run_0_1000000/run_0_10000/run_0_100" + + existing_campaign_paths = [ + base_path / "run_0", + base_path / "run_1", + base_path / "run_2", + base_path / "run_3", + base_path / "run_4", + ] + + for path in existing_campaign_paths: + Path(path).mkdir(parents=True, exist_ok=True) + + template = {"block": {"var4": 1.23}, "other_block": {"var5": True}} + campaign = epyscan.Campaign(template, tmp_path) + + new_path = campaign.setup_case({"block": {"var4": 1.24}}) + assert new_path == (base_path / "run_0") + assert new_path.exists() + + def test_retrieve_existing_run_count(tmp_path): base_path = tmp_path / "run_0_1000000/run_0_10000/run_0_100" (base_path / "run_0").mkdir(parents=True) From caa9ab21a5d946ac5838c41d752401f811b6e4fb Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Fri, 12 Sep 2025 11:55:27 +0000 Subject: [PATCH 4/4] [skip ci] Apply black changes --- src/epyscan/__init__.py | 12 +++++++++--- tests/test_epyscan.py | 4 +++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/epyscan/__init__.py b/src/epyscan/__init__.py index b968c16..7284c8d 100644 --- a/src/epyscan/__init__.py +++ b/src/epyscan/__init__.py @@ -27,7 +27,9 @@ def allocate_next_run_number(root: Path): The next available run number, or 0 if none exist. """ run_nums = [ - int(path.name[4:]) for path in root.rglob("*") if re.fullmatch(r"run_\d+", path.name) + int(path.name[4:]) + for path in root.rglob("*") + if re.fullmatch(r"run_\d+", path.name) ] return max(run_nums, default=-1) + 1 @@ -66,7 +68,9 @@ def rundir_hierarchy(root: Path, run_num: int) -> Path: def level_dir(exponent: int) -> Path: level = 100**exponent - return Path(f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}") + return Path( + f"run_{int(run_num / level) * level}_{int(run_num / level + 1) * level}" + ) level_1_dir = level_dir(3) level_2_dir = level_dir(2) @@ -193,7 +197,9 @@ def __init__(self, parameters: dict, n_samples: int = 10): def _gridspace(start, stop, num: int, log: bool = False, endpoint: bool = True): """Generalisation over logspace/linspace""" if log: - return np.logspace(np.log10(start), np.log10(stop), num=num, endpoint=endpoint) + return np.logspace( + np.log10(start), np.log10(stop), num=num, endpoint=endpoint + ) return np.linspace(start, stop, num=num, endpoint=endpoint) diff --git a/tests/test_epyscan.py b/tests/test_epyscan.py index 02a4288..bc0295d 100644 --- a/tests/test_epyscan.py +++ b/tests/test_epyscan.py @@ -83,7 +83,9 @@ def test_latin_hypercube(): for k, v in parameters.items(): intervals = np.linspace(v["min"], v["max"], n_samples + 1) if v.get("log", False): - intervals = np.logspace(np.log10(v["min"]), np.log10(v["max"]), n_samples + 1) + intervals = np.logspace( + np.log10(v["min"]), np.log10(v["max"]), n_samples + 1 + ) samples_for_k = np.array([sample[k] for sample in samples]) interval_counts = np.array( [