simopt-admin · cenwangumass · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,6 +53,7 @@ dev = [
     "pytest>=9.0.1",
     "ruff>=0.14.2",
     "ty>=0.0.1a26",
+    "zstandard>=0.25.0",
 ]
 docs = ["sphinx>=8.2.3", "sphinx-autoapi>=3.6.1", "sphinx-rtd-theme>=3.0.2"]
 notebooks = ["ipykernel>=7.1.0"]

diff --git a/scripts/generate_experiment_results.py b/scripts/generate_experiment_results.py
@@ -1,9 +1,10 @@
 """Create test cases for all compatible problem-solver pairs."""
 
+import pickle
 import sys
 from pathlib import Path
 
-import yaml
+import zstandard as zstd
 
 # Append the parent directory (simopt package) to the system path
 sys.path.append(str(Path(__file__).resolve().parent.parent))
@@ -57,12 +58,12 @@ def create_test(problem_name: str, solver_name: str) -> None:
     # Loop through each curve object and convert it into a tuple
     # This is done to avoid packing custom classes into the YAML file
     for i in range(len(myexperiment.objective_curves)):
-        myexperiment.objective_curves[i] = (  # type: ignore
+        myexperiment.objective_curves[i] = (
             myexperiment.objective_curves[i].x_vals,
             myexperiment.objective_curves[i].y_vals,
         )
     for i in range(len(myexperiment.progress_curves)):
-        myexperiment.progress_curves[i] = (  # type: ignore
+        myexperiment.progress_curves[i] = (
             myexperiment.progress_curves[i].x_vals,
             myexperiment.progress_curves[i].y_vals,
         )
@@ -83,11 +84,11 @@ def create_test(problem_name: str, solver_name: str) -> None:
     # Define the directory and output file
     file_problem_name = "".join(e for e in problem_name if e.isalnum())
     file_solver_name = "".join(e for e in solver_name if e.isalnum())
-    results_filename = f"{file_problem_name}_{file_solver_name}.yaml"
+    results_filename = f"{file_problem_name}_{file_solver_name}.pickle.zst"
     results_filepath = EXPECTED_RESULTS_DIR / results_filename
     # Write the results to the file
-    with Path.open(results_filepath, "w") as f:
-        yaml.dump(results_dict, f)
+    with zstd.open(results_filepath, "wb") as f:
+        pickle.dump(results_dict, f)
 
 
 def main() -> None:
@@ -103,7 +104,7 @@ def main() -> None:
     # Create the test directory if it doesn't exist
     # Create the expected directory if it doesn't exist
     Path.mkdir(EXPECTED_RESULTS_DIR, parents=True, exist_ok=True)
-    existing_results = [path.name for path in EXPECTED_RESULTS_DIR.glob("*.yaml")]
+    existing_results = [path.name for path in EXPECTED_RESULTS_DIR.glob("*.pickle.zst")]
 
     # Don't generate any tests for pairs that already have tests generated
     for pair in compatible_pairs:
@@ -112,7 +113,7 @@ def main() -> None:
         # Generate the expected filenames
         file_problem_name = "".join(e for e in problem_name if e.isalnum())
         file_solver_name = "".join(e for e in solver_name if e.isalnum())
-        results_filename = f"{file_problem_name}_{file_solver_name}.yaml"
+        results_filename = f"{file_problem_name}_{file_solver_name}.pickle.zst"
         # If file exists, skip it
         if results_filename in existing_results:
             print(f"Test for {pair} already exists")

diff --git a/simopt/GUI.py b/simopt/GUI.py
diff --git a/simopt/__main__.py b/simopt/__main__.py
diff --git a/simopt/experiment/run_solver.py b/simopt/experiment/run_solver.py
@@ -0,0 +1,144 @@
+"""Functions for running solvers and collecting their outputs."""
+
+import logging
+import time
+
+import pandas as pd
+from joblib import Parallel, delayed
+
+from mrg32k3a.mrg32k3a import MRG32k3a
+from simopt.problem import Problem
+from simopt.solver import Solver
+
+
+def _trim(df: pd.DataFrame, budget: int) -> pd.DataFrame:
+    """Trim solution history beyond the problem's budget."""
+    df = df.loc[df["budget"] <= budget].copy()
+
+    # Add the latest solution as the final row
+    if df["budget"].iloc[-1] < budget:
+        row = pd.DataFrame.from_records(
+            [{"step": len(df), "solution": df["solution"].iloc[-1], "budget": budget}]
+        )
+        df = pd.concat([df, row], ignore_index=True)
+
+    return df
+
+
+def _set_up_rngs(solver: Solver, problem: Problem, mrep: int) -> None:
+    # Stream 0: reserved for taking post-replications
+    # Stream 1: reserved for bootstrapping
+    # Stream 2: reserved for overhead ...
+    #     Substream 0: rng for random problem instance
+    #     Substream 1: rng for random initial solution x0 and restart solutions
+    #     Substream 2: rng for selecting random feasible solutions
+    #     Substream 3: rng for solver's internal randomness
+    # Streams 3, 4, ..., n_macroreps + 2: reserved for
+    #                                     macroreplications
+    # FIXME: the following rngs seem to be overriden by the solver rngs below
+    rng_list = [MRG32k3a(s_ss_sss_index=[2, i + 1, 0]) for i in range(3)]
+    solver.attach_rngs(rng_list)
+
+    # Create RNGs for simulation
+    simulation_rngs = [
+        MRG32k3a(s_ss_sss_index=[mrep + 3, i, 0]) for i in range(problem.model.n_rngs)
+    ]
+
+    # Create RNGs for the solver
+    solver_rngs = [
+        MRG32k3a(
+            s_ss_sss_index=[
+                mrep + 3,
+                problem.model.n_rngs + i,
+                0,
+            ]
+        )
+        for i in range(len(solver.rng_list))
+    ]
+
+    solver.solution_progenitor_rngs = simulation_rngs
+    solver.rng_list = solver_rngs
+
+
+def _run_mrep(
+    solver: Solver, problem: Problem, mrep: int
+) -> tuple[pd.DataFrame, float]:
+    """Run one macroreplication of the solver on the problem."""
+    logging.debug(
+        f"Macroreplication {mrep}: "
+        f"starting solver {solver.name} on problem {problem.name}."
+    )
+
+    # Set up RNGs
+    _set_up_rngs(solver, problem, mrep)
+
+    # Run solver
+    start = time.perf_counter()
+    df = solver.run(problem)
+    elapsed = time.perf_counter() - start
+    logging.debug(
+        f"Macroreplication {mrep}: "
+        f"finished solver {solver.name} on problem {problem.name} "
+        f"in {elapsed:0.4f} seconds."
+    )
+
+    # Trim results to the problem budget and add macroreplication index
+    df = _trim(df, problem.factors["budget"])
+    df["mrep"] = mrep
+
+    return df, elapsed
+
+
+def run_solver(
+    solver: Solver, problem: Problem, n_macroreps: int, n_jobs: int = -1
+) -> tuple[pd.DataFrame, list[float]]:
+    """Runs the solver on the problem for a given number of macroreplications.
+
+    Args:
+        solver (Solver): The solver to run.
+        problem (Problem): The problem to solve.
+        n_macroreps (int): Number of macroreplications to run.
+        n_jobs (int, optional): Number of jobs to run in parallel. Defaults to -1.
+            -1: use all available cores
+            1: run sequentially
+
+    Raises:
+        ValueError: If `n_macroreps` is not positive.
+    """
+    if n_macroreps <= 0:
+        raise ValueError("number of macroreplications must be positive.")
+
+    logging.info(f"Running solver {solver.name} on problem {problem.name}.")
+    logging.debug("Starting macroreplications")
+
+    if n_jobs == 1:
+        results: list[tuple] = [
+            _run_mrep(solver, problem, i) for i in range(n_macroreps)
+        ]
+    else:
+        results: list[tuple] = Parallel(n_jobs=n_jobs)(
+            delayed(_run_mrep)(solver, problem, i) for i in range(n_macroreps)
+        )  # type: ignore
+
+    dfs = []
+    elapsed_times = []
+    for df, elapsed in results:
+        dfs.append(df)
+        elapsed_times.append(elapsed)
+    df = pd.concat(dfs, ignore_index=True)
+
+    return df, elapsed_times
+
+
+def _to_list(df: pd.DataFrame, column: str) -> list[list]:
+    df = df.sort_values(["mrep", "step"])
+    return [group[column].tolist() for _, group in df.groupby("mrep")]
+
+
+def _from_list(data: list[list], column: str) -> pd.DataFrame:
+    records = [
+        {"mrep": mrep, "step": step, column: value}
+        for mrep, steps in enumerate(data)
+        for step, value in enumerate(steps)
+    ]
+    return pd.DataFrame.from_records(records, columns=["mrep", "step", column])