From fb0291077a11f686cc284f9ee390bbddbcf98252 Mon Sep 17 00:00:00 2001
From: Antonio Paolillo <apaolill@gmail.com>
Date: Thu, 29 Jan 2026 12:00:57 +0100
Subject: [PATCH] benchmark: Fix pretty vars & taskset edge cases

- Normalize sequence-valued variables before pretty-printing
  to avoid mismatches in pretty variable mappings
- Apply pretty variable expansion after post-run hooks, once
  experiment result lines are fully materialized
- Make taskset wrapper robust to empty CPU selections and
  allow cpu_list to implicitly define cpu_order
- Extend cartesian_product to support dict-valued variables
  by iterating over (key, value) pairs

This fixes several subtle edge cases in result formatting and
CPU pinning while making variable expansion more flexible.

Signed-off-by: Antonio Paolillo <apaolill@gmail.com>
---
 benchkit/benchmark.py               | 10 +++++++---
 benchkit/commandwrappers/taskset.py | 13 ++++++++++---
 benchkit/utils/variables.py         | 15 ++++++++++++---
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/benchkit/benchmark.py b/benchkit/benchmark.py
index f4496eea..5fa6ba81 100644
--- a/benchkit/benchmark.py
+++ b/benchkit/benchmark.py
@@ -11,7 +11,7 @@
 import pathlib
 from multiprocessing import Barrier
 from subprocess import CalledProcessError
-from typing import IO, Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type
+from typing import IO, Any, Dict, Iterable, List, Optional, Protocol, Sequence, Tuple, Type
 
 from benchkit.commandwrappers import CommandWrapper
 from benchkit.dependencies import check_dependencies
@@ -929,6 +929,9 @@ def _update_pretty_variables(self, experiment_results: Dict[str, Any]):
                     experiment_results[ugly2pretty] = ugly_var_value
                     continue
 
+                if isinstance(ugly_var_value, Sequence):
+                    ugly_var_value = ugly_var_value[0]
+
                 pretty_var_value = ugly2pretty.get(ugly_var_value, ugly_var_value)
                 experiment_results[f"{var_name}_pretty"] = f'"{pretty_var_value}"'
                 # If __category__ is defined, also create a column with that name
@@ -978,8 +981,6 @@ def _run_single_run(
             experiment_results.update(run_variables)
             experiment_results.update(other_variables)
 
-            self._update_pretty_variables(experiment_results=experiment_results)
-
             experiment_results.update({"rep": run_id})
 
             if not self.valid_experiment_parameters(**experiment_results):
@@ -1097,6 +1098,9 @@ def wrdr(file_content: str, filename: PathType) -> None:
                     for xrline in experiment_results_lines:
                         xrline.update(hook_dict)
 
+            for xrline in experiment_results_lines:
+                self._update_pretty_variables(experiment_results=xrline)
+
             wrdr(
                 file_content=json.dumps(
                     experiment_results_lines,
diff --git a/benchkit/commandwrappers/taskset.py b/benchkit/commandwrappers/taskset.py
index 52094c7a..9fac0602 100644
--- a/benchkit/commandwrappers/taskset.py
+++ b/benchkit/commandwrappers/taskset.py
@@ -5,7 +5,7 @@
 threads of the wrapped command are scheduled.
 """
 
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 from benchkit.platforms import Platform, get_current_platform
 from benchkit.utils.types import CpuOrder
@@ -33,6 +33,7 @@ def dependencies(self) -> List[PackageDependency]:
     def command_prefix(
         self,
         cpu_order: CpuOrder = None,
+        cpu_list: Optional[Tuple[str, List[int]]] = None,
         master_thread_core: Optional[int] = None,
         nb_threads: Optional[int] = None,
         **kwargs,
@@ -43,6 +44,9 @@ def command_prefix(
             **kwargs,
         )
 
+        if cpu_order is None and cpu_list is not None:
+            cpu_order = cpu_list[1]
+
         mtc = master_thread_core
 
         if self.set_all_cpus:
@@ -51,13 +55,16 @@ def command_prefix(
 
             cpu_order_list = self.platform.cpu_order(provided_order=cpu_order)
 
-            cpu_order_list = [str(x) for x in cpu_order_list[0:nb_threads]]
+            cpu_order_list = [str(x) for x in cpu_order_list]
             cpu_order_str = ",".join(cpu_order_list)
 
+            if not cpu_order_list:
+                return []
+
             cmd_prefix = ["taskset", "--cpu-list", cpu_order_str] + cmd_prefix
         else:
             if mtc is None:
-                if cpu_order is None:
+                if not cpu_order:
                     return []
 
                 cpu_order_list = self.platform.cpu_order(provided_order=cpu_order)
diff --git a/benchkit/utils/variables.py b/benchkit/utils/variables.py
index 38f3bcaa..fd20791c 100644
--- a/benchkit/utils/variables.py
+++ b/benchkit/utils/variables.py
@@ -20,12 +20,21 @@ def cartesian_product(variables: Dict[str, Iterable[Any]]) -> Iterator[Dict[str,
         variables (Dict[str, Iterable[Any]]): set of variables to multiply to each other.
 
     Returns:
-        Iterator[Dict[str, Any]]: cartesian product if the given variables.
+        Iterator[Dict[str, Any]]: cartesian product of the given variables.
 
     Yields:
-        Iterator[Dict[str, Any]]: cartesian product if the given variables.
+        Dict[str, Any]: one assignment from the cartesian product.
     """
-    non_empty_variables = {k: v for k, v in variables.items() if v}
+
+    def _normalize_values(v: Any) -> Iterable[Any]:
+        # If the variable is a dict, iterate over (key, value) pairs
+        if isinstance(v, dict):
+            return [(k, vv) for k, vv in v.items()]
+        return v
+
+    normalized = {k: _normalize_values(v) for k, v in variables.items()}
+    non_empty_variables = {k: v for k, v in normalized.items() if v}
+
     product_gen = (
         dict(zip(non_empty_variables.keys(), record))
         for record in itertools.product(*non_empty_variables.values())