From 00efc729ad47304053fc824087b9e448d75c656b Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Tue, 10 Feb 2026 15:23:24 -0500
Subject: [PATCH 1/7] chore: add benchmark history

---
 .github/workflows/bench-compare.yaml | 100 +++-
 scripts/bench_results.js             | 217 +++++++
 scripts/index.html                   | 864 +++++++++++++++++++++++++++
 scripts/test_bench_results.sh        |  72 +++
 4 files changed, 1243 insertions(+), 10 deletions(-)
 create mode 100644 scripts/bench_results.js
 create mode 100644 scripts/index.html
 create mode 100755 scripts/test_bench_results.sh

diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml
index a7141e07..2d5e0fde 100644
--- a/.github/workflows/bench-compare.yaml
+++ b/.github/workflows/bench-compare.yaml
@@ -7,7 +7,12 @@ jobs:
   compare-branches:
     runs-on: ubuntu-latest
     permissions:
+      actions: write
+      contents: write
       pull-requests: write
+    env:
+      BENCH_HISTORY_BRANCH: benchmarks-history
+      BENCH_HISTORY_DIR: tmp-bench-history
     steps:
       - uses: actions/checkout@v6
       - name: Setup environment
@@ -28,10 +33,23 @@ jobs:
         run: git checkout ${{ github.event.pull_request.head.ref }}
 
       - name: Run hyperfine benchmarks
-        run: mise run bench:hyperfine -- -- --export-markdown hyperfine.md
+        run:
+          mise run bench:hyperfine -- -- --export-markdown hyperfine.md
+          --export-json hyperfine.json
 
-      - name: Run benchmarks on head branch and format output
+      - name: Run criterion benchmarks (baseline compare)
         run: |
+          set -euo pipefail
+          if cargo bench -p ixa-bench -- --baseline base 2>&1 | tee criterion-compare.txt; then
+            cargo run -q -p ixa-bench --bin check_criterion_regressions | tee criterion-regressions.txt
+          else
+            echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" | tee criterion-regressions.txt
+            mise run bench:criterion 2>&1 | tee -a criterion-compare.txt
+          fi
+
+      - name: Format PR comment
+        run: |
+          set -euo pipefail
           echo '### Benchmark Results' > results.md
           echo '' >> results.md
           echo '#### Hyperfine' >> results.md
@@ -42,19 +60,81 @@ jobs:
           echo '' >> results.md
           echo '#### Criterion' >> results.md
           echo '' >> results.md
-          if ! cargo bench -p ixa-bench -- --baseline base 2>&1; then
-            echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" >> results.md
-            mise run bench:criterion 2>&1
-          else
-            echo '```' >> results.md
-            cargo run -q -p ixa-bench --bin check_criterion_regressions >> results.md
-            echo '```' >> results.md
-          fi
+          echo '```' >> results.md
+          cat criterion-regressions.txt >> results.md
+          echo '```' >> results.md
 
       - name: Add comment to PR
+        if: ${{ github.event_name == 'pull_request' }}
         run: |
           gh pr comment ${{ github.event.pull_request.number }} \
             --repo ${{ github.repository }} \
             --body-file results.md
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name:
+          Download previous bench history from benchmarks-history branch (best
+          effort)
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          # Fetches raw JSON directly from the benchmarks-history branch.
+          # If it doesn't exist yet, we continue with an empty history.
+          if gh api \
+            -H "Accept: application/vnd.github+json" \
+            "/repos/${GITHUB_REPOSITORY}/contents/bench-history.json?ref=${BENCH_HISTORY_BRANCH}" \
+            --jq '.content' > bench-history.b64 2>/dev/null; then
+            base64 -d bench-history.b64 > bench-history.json
+            echo "Downloaded bench-history.json from ${BENCH_HISTORY_BRANCH}"
+          else
+            echo "No bench-history.json found on ${BENCH_HISTORY_BRANCH} (first run?)."
+          fi
+
+      - name: Create JSON results
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number || '' }}
+          BASE_REF: ${{ github.event.pull_request.base.ref || '' }}
+          BASE_SHA: ${{ github.event.pull_request.base.sha || '' }}
+          HEAD_REF: ${{ github.event.pull_request.head.ref || github.ref_name }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          RUN_BRANCH:
+            ${{ github.event.pull_request.head.ref || github.ref_name }}
+        run: |
+          set -euo pipefail
+          node scripts/bench_results.js \
+            --repo "${GITHUB_REPOSITORY}" \
+            --branch "${RUN_BRANCH}" \
+            --pr-number "${PR_NUMBER}" \
+            --base-ref "${BASE_REF}" --base-sha "${BASE_SHA}" \
+            --head-ref "${HEAD_REF}" --head-sha "${HEAD_SHA}" \
+            --hyperfine-json hyperfine.json \
+            --criterion-log criterion-compare.txt \
+            --history-in bench-history.json \
+            --out-current bench-current.json \
+            --history-out bench-history.json
+
+      - name: Publish bench-history.json to benchmarks-history branch
+        if:
+          ${{ github.event_name != 'pull_request' ||
+          github.event.pull_request.head.repo.full_name == github.repository }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          rm -rf "${BENCH_HISTORY_DIR}"
+          git clone --depth 1 --branch "${BENCH_HISTORY_BRANCH}" "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" "${BENCH_HISTORY_DIR}"
+          cp bench-history.json "${BENCH_HISTORY_DIR}/bench-history.json"
+          pushd "${BENCH_HISTORY_DIR}" >/dev/null
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          # git diff doesn't consider untracked files; use status to detect any change.
+          if [ -z "$(git status --porcelain)" ]; then
+            echo "No changes to bench-history.json"
+            exit 0
+          fi
+          git add bench-history.json
+          git commit -m "Update bench-history.json"
+          git push
+          popd >/dev/null
diff --git a/scripts/bench_results.js b/scripts/bench_results.js
new file mode 100644
index 00000000..11ed31a7
--- /dev/null
+++ b/scripts/bench_results.js
@@ -0,0 +1,217 @@
+#!/usr/bin/env node
+/*
+Generates benchmark JSON artifacts from Hyperfine + Criterion outputs.
+
+Usage (example):
+  node scripts/bench_results.js \
+    --repo owner/repo \
+    --branch my-branch \
+    --pr-number 123 \
+    --base-ref main --base-sha abc \
+    --head-ref feature --head-sha def \
+    --hyperfine-json hyperfine.json \
+    --criterion-log criterion-compare.txt \
+    --history-in bench-history.json \
+    --out-current bench-current.json \
+    --history-out bench-history.json
+
+All args are optional except input files; missing inputs produce empty result arrays.
+*/
+
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+
+function die(message) {
+  process.stderr.write(`${message}\n`);
+  process.exit(2);
+}
+
+function getArgValue(argv, name) {
+  const idx = argv.indexOf(name);
+  if (idx === -1) return undefined;
+  const value = argv[idx + 1];
+  if (!value || value.startsWith('--')) die(`Missing value for ${name}`);
+  return value;
+}
+
+function hasArg(argv, name) {
+  return argv.includes(name);
+}
+
+function parseMaybeNumber(text) {
+  if (text == null || text === '') return undefined;
+  const n = Number(text);
+  return Number.isFinite(n) ? n : undefined;
+}
+
+function parseDurationToSeconds(text) {
+  // Examples: "12.3 ms ± 0.2 ms", "1.234 s", "456 µs"
+  const m = String(text).trim().match(/([0-9]*\.?[0-9]+)\s*([a-zA-Zµμ]+)/);
+  if (!m) return null;
+  const value = Number(m[1]);
+  const unitRaw = m[2];
+  const unit = unitRaw.replace('μ', 'µ');
+  const factor = {
+    s: 1,
+    sec: 1,
+    ms: 1e-3,
+    us: 1e-6,
+    'µs': 1e-6,
+    ns: 1e-9,
+  }[unit] ?? null;
+  if (!Number.isFinite(value) || factor == null) return null;
+  return value * factor;
+}
+
+function readTextIfExists(filePath) {
+  if (!filePath) return '';
+  if (!fs.existsSync(filePath)) return '';
+  return fs.readFileSync(filePath, 'utf8');
+}
+
+function readJsonIfExists(filePath) {
+  if (!filePath) return null;
+  if (!fs.existsSync(filePath)) return null;
+  return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+}
+
+function parseCriterionCompareLog(text) {
+  // Extracts benchmark name and the reported time confidence interval triple.
+  // Example:
+  //   Benchmarking sample_people/sampling_multiple_l_reservoir
+  //   ...
+  //   time:   [10.771 ms 10.811 ms 10.866 ms]
+  const lines = String(text || '').split(/\r?\n/);
+  const resultsByName = new Map();
+  let currentName = null;
+
+  for (const line of lines) {
+    const benchMatch = line.match(/^Benchmarking\s+(.+?)\s*$/);
+    if (benchMatch) {
+      currentName = benchMatch[1].trim();
+      continue;
+    }
+
+    const timeMatch = line.match(
+      /\btime:\s*\[\s*([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s*\]/,
+    );
+    if (!timeMatch || !currentName) continue;
+
+    const t1 = timeMatch[1];
+    const t2 = timeMatch[2];
+    const t3 = timeMatch[3];
+    const s1 = parseDurationToSeconds(t1);
+    const s2 = parseDurationToSeconds(t2);
+    const s3 = parseDurationToSeconds(t3);
+    if (s1 == null || s2 == null || s3 == null) continue;
+
+    resultsByName.set(currentName, {
+      name: currentName,
+      time_text: [t1, t2, t3],
+      time_sec: [s1, s2, s3],
+    });
+  }
+
+  return Array.from(resultsByName.values());
+}
+
+function parseHyperfineJson(hyperfineJson) {
+  if (!hyperfineJson || !Array.isArray(hyperfineJson.results)) return [];
+
+  return hyperfineJson.results.map((r) => {
+    const times = Array.isArray(r.times) ? r.times.filter((n) => Number.isFinite(n)) : [];
+    return {
+      name: r.command ?? r.parameter ?? 'unknown',
+      times_sec: times,
+      mean_sec: Number.isFinite(r.mean) ? r.mean : undefined,
+      min_sec: Number.isFinite(r.min) ? r.min : undefined,
+      max_sec: Number.isFinite(r.max) ? r.max : undefined,
+      stddev_sec: Number.isFinite(r.stddev) ? r.stddev : undefined,
+    };
+  });
+}
+
+function main() {
+  const argv = process.argv.slice(2);
+
+  if (hasArg(argv, '--help') || hasArg(argv, '-h')) {
+    process.stdout.write(fs.readFileSync(__filename, 'utf8').split('\n').slice(0, 40).join('\n') + '\n');
+    return;
+  }
+
+  const repo = getArgValue(argv, '--repo') ?? process.env.GITHUB_REPOSITORY;
+  const branch = getArgValue(argv, '--branch') ?? process.env.RUN_BRANCH ?? process.env.GITHUB_REF_NAME;
+  const prNumber = parseMaybeNumber(getArgValue(argv, '--pr-number') ?? process.env.PR_NUMBER);
+
+  const baseRef = getArgValue(argv, '--base-ref') ?? process.env.BASE_REF;
+  const baseSha = getArgValue(argv, '--base-sha') ?? process.env.BASE_SHA;
+
+  const headRef = getArgValue(argv, '--head-ref') ?? process.env.HEAD_REF;
+  const headSha = getArgValue(argv, '--head-sha') ?? process.env.HEAD_SHA ?? process.env.GITHUB_SHA;
+
+  const runAt = getArgValue(argv, '--run-at') ?? new Date().toISOString();
+
+  const hyperfineJsonPath = getArgValue(argv, '--hyperfine-json') ?? 'hyperfine.json';
+  const criterionLogPath = getArgValue(argv, '--criterion-log') ?? 'criterion-compare.txt';
+
+  const outCurrent = getArgValue(argv, '--out-current') ?? 'bench-current.json';
+  const historyIn = getArgValue(argv, '--history-in');
+  const historyOut = getArgValue(argv, '--history-out') ?? 'bench-history.json';
+
+  const hyperfineJson = readJsonIfExists(hyperfineJsonPath);
+  const hyperfineTimings = parseHyperfineJson(hyperfineJson);
+
+  const criterionCompareLog = readTextIfExists(criterionLogPath);
+  const criterionTimings = parseCriterionCompareLog(criterionCompareLog);
+
+  const payload = {
+    schema: 1,
+    generated_at: runAt,
+    repository: repo,
+    pr_number: prNumber,
+    branch,
+    base: {
+      ref: baseRef,
+      sha: baseSha,
+    },
+    head: {
+      ref: headRef,
+      sha: headSha,
+      url: repo && headSha ? `https://github.com/${repo}/commit/${headSha}` : undefined,
+    },
+    hyperfine: {
+      results: hyperfineTimings,
+    },
+    criterion: {
+      results: criterionTimings,
+    },
+  };
+
+  fs.writeFileSync(outCurrent, JSON.stringify(payload, null, 2));
+
+  // History handling.
+  const historyPath = path.resolve(historyOut);
+  const history = historyIn
+    ? (readJsonIfExists(historyIn) ?? { schema: 1, runs: [] })
+    : (readJsonIfExists(historyPath) ?? { schema: 1, runs: [] });
+
+  if (!Array.isArray(history.runs)) history.runs = [];
+
+  history.schema = 1;
+  history.updated_at = runAt;
+  history.runs.push({
+    run_at: runAt,
+    branch,
+    pr_number: prNumber,
+    base: payload.base,
+    head: payload.head,
+    hyperfine: payload.hyperfine.results,
+    criterion: payload.criterion.results,
+  });
+
+  fs.writeFileSync(historyPath, JSON.stringify(history, null, 2));
+}
+
+main();
diff --git a/scripts/index.html b/scripts/index.html
new file mode 100644
index 00000000..20fd94ca
--- /dev/null
+++ b/scripts/index.html
@@ -0,0 +1,864 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>IXA Bench History</title>
+    <style>
+      :root {
+        color-scheme: light dark;
+      }
+
+      body {
+        margin: 0;
+        font: 14px/1.45 system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell,
+          Noto Sans, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji";
+      }
+
+      header {
+        padding: 16px 20px;
+        border-bottom: 1px solid rgba(127, 127, 127, 0.25);
+      }
+
+      header h1 {
+        margin: 0;
+        font-size: 16px;
+        font-weight: 650;
+      }
+
+      header .meta {
+        margin-top: 6px;
+        opacity: 0.85;
+        font-size: 12px;
+      }
+
+      main {
+        padding: 16px 20px 24px;
+        max-width: 1200px;
+      }
+
+      .row {
+        display: flex;
+        gap: 12px;
+        flex-wrap: wrap;
+        align-items: end;
+        margin-bottom: 16px;
+      }
+
+      label {
+        display: grid;
+        gap: 6px;
+        font-size: 12px;
+        opacity: 0.9;
+      }
+
+      select,
+      button {
+        font: inherit;
+        padding: 8px 10px;
+        border-radius: 8px;
+        border: 1px solid rgba(127, 127, 127, 0.35);
+        background: transparent;
+        min-width: 260px;
+      }
+
+      button {
+        min-width: auto;
+        cursor: pointer;
+      }
+
+      .checkbox {
+        display: inline-flex;
+        align-items: center;
+        gap: 8px;
+        padding: 8px 10px;
+        border: 1px solid rgba(127, 127, 127, 0.35);
+        border-radius: 8px;
+        user-select: none;
+      }
+
+      .checkbox input {
+        transform: translateY(1px);
+      }
+
+      .grid {
+        display: grid;
+        grid-template-columns: 1fr;
+        gap: 18px;
+      }
+
+      .card {
+        border: 1px solid rgba(127, 127, 127, 0.25);
+        border-radius: 12px;
+        padding: 12px;
+      }
+
+      .card h2 {
+        margin: 0 0 10px;
+        font-size: 13px;
+        opacity: 0.9;
+        font-weight: 650;
+      }
+
+      .canvasWrap {
+        position: relative;
+        height: 420px;
+      }
+
+      .error {
+        color: #b00020;
+        white-space: pre-wrap;
+        margin-top: 12px;
+      }
+
+      .hint {
+        margin-top: 10px;
+        opacity: 0.8;
+        font-size: 12px;
+      }
+
+      @media (min-width: 1000px) {
+        .grid {
+          grid-template-columns: 1.5fr 1fr;
+        }
+      }
+    </style>
+  </head>
+
+  <body>
+    <header>
+      <h1>IXA benchmark history</h1>
+      <div class="meta" id="meta">Loading…</div>
+    </header>
+
+    <main>
+      <div class="row">
+        <label>
+          Suite
+          <select id="suite">
+            <option value="criterion">criterion</option>
+            <option value="hyperfine">hyperfine</option>
+          </select>
+        </label>
+
+        <label class="checkbox" title="Render a separate time-series chart for every benchmark in the selected suite">
+          <input id="showAll" type="checkbox" />
+          Show all benchmarks
+        </label>
+
+        <label>
+          Benchmark
+          <select id="benchmark"></select>
+        </label>
+
+        <label>
+          Metric
+          <select id="metric"></select>
+        </label>
+
+        <label class="checkbox" title="Show an uncertainty band when available">
+          <input id="showBand" type="checkbox" checked />
+          Show band
+        </label>
+
+        <button id="reload" type="button">Reload</button>
+      </div>
+
+      <div class="grid">
+        <section class="card">
+          <h2>Time series</h2>
+          <div class="canvasWrap" id="tsSingleWrap"><canvas id="ts"></canvas></div>
+          <div id="tsAllWrap" hidden></div>
+          <div class="hint" id="tsHint"></div>
+        </section>
+
+        <section class="card">
+          <h2>Latest vs previous (percent change)</h2>
+          <div class="row" style="margin-top: 6px; margin-bottom: 10px">
+            <label>
+              Baseline run
+              <div style="display: flex; gap: 8px; align-items: center; flex-wrap: wrap">
+                <button id="basePrev" type="button" title="Older">←</button>
+                <select id="baseRun" style="min-width: 260px; max-width: 520px"></select>
+                <button id="baseNext" type="button" title="Newer">→</button>
+              </div>
+            </label>
+
+            <label>
+              Compare run
+              <div style="display: flex; gap: 8px; align-items: center; flex-wrap: wrap">
+                <button id="cmpPrev" type="button" title="Older">←</button>
+                <select id="cmpRun" style="min-width: 260px; max-width: 520px"></select>
+                <button id="cmpNext" type="button" title="Newer">→</button>
+              </div>
+            </label>
+          </div>
+          <div class="canvasWrap" id="deltaWrap"><canvas id="delta"></canvas></div>
+          <div class="hint" id="deltaHint"></div>
+        </section>
+      </div>
+
+      <div class="error" id="error" hidden></div>
+    </main>
+
+    <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
+    <script>
+      const HISTORY_URL =
+        "https://raw.githubusercontent.com/cdc-as81/ixa/refs/heads/gh-pages/bench-history.json";
+
+      /**
+       * Expected JSON shape (schema v1):
+       * { schema: 1, runs: [ { run_at, branch, base, head, criterion: [], hyperfine: [] } ] }
+       */
+
+      /** @type {Chart | null} */
+      let tsChart = null;
+      /** @type {Chart[]} */
+      let tsCharts = [];
+      /** @type {Chart | null} */
+      let deltaChart = null;
+
+      const $ = (id) => document.getElementById(id);
+
+      function setError(message) {
+        const el = $("error");
+        if (!message) {
+          el.hidden = true;
+          el.textContent = "";
+          return;
+        }
+        el.hidden = false;
+        el.textContent = message;
+      }
+
+      function formatDate(iso) {
+        const d = new Date(iso);
+        if (Number.isNaN(d.getTime())) return String(iso);
+        return d.toISOString().slice(0, 19).replace("T", " ") + "Z";
+      }
+
+      function bestUnit(seconds) {
+        const s = Math.abs(seconds);
+        if (s === 0) return { scale: 1, unit: "s" };
+        if (s >= 1) return { scale: 1, unit: "s" };
+        if (s >= 1e-3) return { scale: 1e3, unit: "ms" };
+        if (s >= 1e-6) return { scale: 1e6, unit: "µs" };
+        return { scale: 1e9, unit: "ns" };
+      }
+
+      function fmtSeconds(seconds, forcedUnit) {
+        const { scale, unit } = forcedUnit ?? bestUnit(seconds);
+        const v = seconds * scale;
+        const abs = Math.abs(v);
+        const digits = abs >= 100 ? 0 : abs >= 10 ? 1 : 2;
+        return `${v.toFixed(digits)} ${unit}`;
+      }
+
+      function safeSha(sha) {
+        if (!sha) return "";
+        return String(sha).slice(0, 7);
+      }
+
+      function destroyCharts() {
+        if (tsChart) {
+          tsChart.destroy();
+          tsChart = null;
+        }
+        for (const c of tsCharts) c.destroy();
+        tsCharts = [];
+        if (deltaChart) {
+          deltaChart.destroy();
+          deltaChart = null;
+        }
+      }
+
+      function getSuiteRuns(history, suite) {
+        const runs = Array.isArray(history?.runs) ? history.runs.slice() : [];
+        runs.sort((a, b) => new Date(a.run_at) - new Date(b.run_at));
+        return runs.map((r) => ({
+          run_at: r.run_at,
+          branch: r.branch,
+          pr_number: r.pr_number,
+          base: r.base,
+          head: r.head,
+          entries: Array.isArray(r?.[suite]) ? r[suite] : [],
+        }));
+      }
+
+      function indexByName(entries) {
+        /** @type {Map<string, any>} */
+        const map = new Map();
+        for (const e of entries) {
+          if (e && typeof e.name === "string") map.set(e.name, e);
+        }
+        return map;
+      }
+
+      function listBenchmarks(suiteRuns) {
+        const set = new Set();
+        for (const r of suiteRuns) {
+          for (const e of r.entries) {
+            if (e && typeof e.name === "string") set.add(e.name);
+          }
+        }
+        return [...set].sort((a, b) => a.localeCompare(b));
+      }
+
+      function listMetricsForSuite(suite) {
+        if (suite === "criterion") {
+          return [
+            { value: "estimate", label: "estimate" },
+            { value: "lower", label: "lower" },
+            { value: "upper", label: "upper" },
+          ];
+        }
+        return [
+          { value: "mean_sec", label: "mean" },
+          { value: "min_sec", label: "min" },
+          { value: "max_sec", label: "max" },
+          { value: "stddev_sec", label: "stddev" },
+        ];
+      }
+
+      function readValueSeconds(suite, entry, metric) {
+        if (!entry) return null;
+
+        if (suite === "criterion") {
+          const arr = entry.time_sec;
+          if (!Array.isArray(arr) || arr.length < 3) return null;
+          const lower = Number(arr[0]);
+          const est = Number(arr[1]);
+          const upper = Number(arr[2]);
+          if ([lower, est, upper].some((x) => !Number.isFinite(x))) return null;
+
+          if (metric === "lower") return lower;
+          if (metric === "upper") return upper;
+          return est;
+        }
+
+        const v = Number(entry[metric]);
+        if (!Number.isFinite(v)) return null;
+        return v;
+      }
+
+      function readBandSeconds(suite, entry) {
+        if (!entry) return null;
+        if (suite === "criterion") {
+          const arr = entry.time_sec;
+          if (!Array.isArray(arr) || arr.length < 3) return null;
+          const lower = Number(arr[0]);
+          const upper = Number(arr[2]);
+          if (!Number.isFinite(lower) || !Number.isFinite(upper)) return null;
+          return { lower, upper };
+        }
+
+        const mean = Number(entry.mean_sec);
+        const sd = Number(entry.stddev_sec);
+        if (!Number.isFinite(mean) || !Number.isFinite(sd)) return null;
+        const lower = Math.max(0, mean - sd);
+        const upper = mean + sd;
+        return { lower, upper };
+      }
+
+      function makeLabels(suiteRuns) {
+        return suiteRuns.map((r) => {
+          const sha = safeSha(r?.head?.sha);
+          const pr = r.pr_number ? `PR #${r.pr_number}` : r.branch ? r.branch : "";
+          const left = formatDate(r.run_at);
+          const right = [sha, pr].filter(Boolean).join(" · ");
+          return right ? `${left} — ${right}` : left;
+        });
+      }
+
+      function pickDefaultBenchmark(benchmarks) {
+        if (!benchmarks.length) return "";
+        // Prefer a common-looking benchmark when available
+        const preferredPrefix = ["large_sir::ixa", "large_sir::baseline", "counts/"];
+        for (const p of preferredPrefix) {
+          const found = benchmarks.find((b) => b.startsWith(p));
+          if (found) return found;
+        }
+        return benchmarks[0];
+      }
+
+      function renderSingleTimeSeriesChart({
+        canvas,
+        labels,
+        suiteRuns,
+        suite,
+        benchmarkName,
+        metric,
+        showBand,
+      }) {
+        const allPoints = [];
+        const lowerBand = [];
+        const upperBand = [];
+
+        for (const r of suiteRuns) {
+          const byName = indexByName(r.entries);
+          const entry = byName.get(benchmarkName);
+          allPoints.push(readValueSeconds(suite, entry, metric));
+          const band = showBand ? readBandSeconds(suite, entry) : null;
+          lowerBand.push(band ? band.lower : null);
+          upperBand.push(band ? band.upper : null);
+        }
+
+        const vals = allPoints.filter((x) => Number.isFinite(x));
+        const unit = vals.length
+          ? bestUnit(vals.slice().sort((a, b) => a - b)[Math.floor(vals.length / 2)])
+          : { scale: 1, unit: "s" };
+
+        const datasets = [];
+        if (showBand) {
+          datasets.push({
+            label: "lower",
+            data: lowerBand.map((s) => (s == null ? null : s * unit.scale)),
+            borderColor: "rgba(0,0,0,0)",
+            pointRadius: 0,
+            borderWidth: 0,
+            tension: 0.15,
+            spanGaps: true,
+          });
+          datasets.push({
+            label: "upper",
+            data: upperBand.map((s) => (s == null ? null : s * unit.scale)),
+            borderColor: "rgba(0,0,0,0)",
+            pointRadius: 0,
+            borderWidth: 0,
+            backgroundColor: "rgba(80, 120, 255, 0.15)",
+            fill: "-1",
+            tension: 0.15,
+            spanGaps: true,
+          });
+        }
+
+        datasets.push({
+          label: `${benchmarkName} (${metric})`,
+          data: allPoints.map((s) => (s == null ? null : s * unit.scale)),
+          borderColor: "rgba(80, 120, 255, 0.95)",
+          backgroundColor: "rgba(80, 120, 255, 0.25)",
+          pointRadius: 2,
+          borderWidth: 2,
+          tension: 0.15,
+          spanGaps: true,
+        });
+
+        return new Chart(canvas, {
+          type: "line",
+          data: { labels, datasets },
+          options: {
+            responsive: true,
+            maintainAspectRatio: false,
+            interaction: { mode: "index", intersect: false },
+            plugins: {
+              legend: { display: false },
+              tooltip: {
+                callbacks: {
+                  label: (item) => {
+                    const scaled = item.parsed.y;
+                    if (scaled == null) return "(missing)";
+                    return `${item.dataset.label}: ${fmtSeconds(
+                      scaled / unit.scale,
+                      unit
+                    )}`;
+                  },
+                },
+              },
+            },
+            scales: {
+              y: {
+                title: { display: true, text: `Time (${unit.unit})` },
+                ticks: { callback: (v) => `${v} ${unit.unit}` },
+              },
+              x: {
+                ticks: {
+                  maxRotation: 0,
+                  autoSkip: true,
+                  maxTicksLimit: 6,
+                },
+              },
+            },
+          },
+        });
+      }
+
+      function renderTimeSeries({ suiteRuns, suite, benchmarkName, metric, showBand }) {
+        if (typeof Chart === "undefined") {
+          throw new Error(
+            "Chart.js failed to load (Chart is undefined). Check network access or CDN availability."
+          );
+        }
+
+        // Reset previous time-series chart(s)
+        if (tsChart) {
+          tsChart.destroy();
+          tsChart = null;
+        }
+        for (const c of tsCharts) c.destroy();
+        tsCharts = [];
+
+        const showAll = $("showAll").checked;
+        const labels = makeLabels(suiteRuns);
+
+        $("tsSingleWrap").hidden = showAll;
+        $("tsAllWrap").hidden = !showAll;
+
+        if (!showAll) {
+          tsChart = renderSingleTimeSeriesChart({
+            canvas: $("ts"),
+            labels,
+            suiteRuns,
+            suite,
+            benchmarkName,
+            metric,
+            showBand,
+          });
+        } else {
+          const allWrap = $("tsAllWrap");
+          allWrap.innerHTML = "";
+
+          const benchmarks = listBenchmarks(suiteRuns);
+          const grid = document.createElement("div");
+          grid.style.display = "grid";
+          grid.style.gridTemplateColumns = "1fr";
+          grid.style.gap = "12px";
+          allWrap.appendChild(grid);
+
+          for (const name of benchmarks) {
+            const block = document.createElement("div");
+            block.style.border = "1px solid rgba(127, 127, 127, 0.25)";
+            block.style.borderRadius = "12px";
+            block.style.padding = "10px";
+
+            const title = document.createElement("div");
+            title.textContent = name;
+            title.style.fontSize = "12px";
+            title.style.opacity = "0.9";
+            title.style.fontWeight = "650";
+            title.style.marginBottom = "8px";
+            block.appendChild(title);
+
+            const wrap = document.createElement("div");
+            wrap.style.position = "relative";
+            wrap.style.height = "220px";
+            block.appendChild(wrap);
+
+            const canvas = document.createElement("canvas");
+            wrap.appendChild(canvas);
+            grid.appendChild(block);
+
+            tsCharts.push(
+              renderSingleTimeSeriesChart({
+                canvas,
+                labels,
+                suiteRuns,
+                suite,
+                benchmarkName: name,
+                metric,
+                showBand,
+              })
+            );
+          }
+        }
+
+        const lastIdx = suiteRuns.length - 1;
+        const last = lastIdx >= 0 ? suiteRuns[lastIdx] : null;
+        const hintParts = [];
+        if (suiteRuns.length) hintParts.push(`${suiteRuns.length} run(s)`);
+        if (last?.head?.sha) hintParts.push(`latest head: ${safeSha(last.head.sha)}`);
+        if (showAll) hintParts.push("showing all benchmarks");
+        $("tsHint").textContent = hintParts.join(" · ");
+      }
+
+      function renderLatestDelta({ suiteRuns, suite, metric, baseIdx, cmpIdx }) {
+        if (typeof Chart === "undefined") {
+          throw new Error(
+            "Chart.js failed to load (Chart is undefined). Check network access or CDN availability."
+          );
+        }
+
+        if (suiteRuns.length < 2) {
+          if (deltaChart) {
+            deltaChart.destroy();
+            deltaChart = null;
+          }
+          $("deltaHint").textContent = "Need at least 2 runs.";
+          return;
+        }
+
+        const hi = suiteRuns.length - 1;
+        const bi = clampInt(baseIdx, 0, hi);
+        const ci = clampInt(cmpIdx, 0, hi);
+
+        if (bi === ci) {
+          if (deltaChart) {
+            deltaChart.destroy();
+            deltaChart = null;
+          }
+          $("deltaHint").textContent = "Pick two different runs to compare.";
+          return;
+        }
+
+        const prev = suiteRuns[bi];
+        const next = suiteRuns[ci];
+        const prevMap = indexByName(prev.entries);
+        const nextMap = indexByName(next.entries);
+
+        /** @type {{name: string, pct: number}[]} */
+        const changes = [];
+        for (const [name, nextEntry] of nextMap.entries()) {
+          const prevEntry = prevMap.get(name);
+          const a = readValueSeconds(suite, prevEntry, metric);
+          const b = readValueSeconds(suite, nextEntry, metric);
+          if (a == null || b == null) continue;
+          if (a === 0) continue;
+          const pct = ((b - a) / a) * 100;
+          if (!Number.isFinite(pct)) continue;
+          changes.push({ name, pct });
+        }
+
+        changes.sort((x, y) => y.pct - x.pct);
+        const MAX = 25;
+        const trimmed = changes.slice(0, MAX);
+
+        // Make the chart height scale with bar count (helps readability)
+        const deltaWrap = $("deltaWrap");
+        const desired = 90 + trimmed.length * 18;
+        deltaWrap.style.height = `${Math.max(320, Math.min(900, desired))}px`;
+
+        const labels = trimmed.map((c) => c.name);
+        const data = trimmed.map((c) => c.pct);
+
+        const ctx = $("delta");
+        if (deltaChart) deltaChart.destroy();
+        deltaChart = new Chart(ctx, {
+          type: "bar",
+          data: {
+            labels,
+            datasets: [
+              {
+                label: "% change (y vs x)",
+                data,
+                backgroundColor: data.map((v) =>
+                  v >= 0 ? "rgba(220, 70, 70, 0.65)" : "rgba(70, 180, 120, 0.65)"
+                ),
+                borderColor: data.map((v) =>
+                  v >= 0 ? "rgba(220, 70, 70, 0.95)" : "rgba(70, 180, 120, 0.95)"
+                ),
+                borderWidth: 1,
+              },
+            ],
+          },
+          options: {
+            responsive: true,
+            maintainAspectRatio: false,
+            indexAxis: "y",
+            plugins: {
+              legend: { display: false },
+              tooltip: {
+                callbacks: {
+                  title: (items) => {
+                    const i = items?.[0];
+                    return i ? String(i.label) : "";
+                  },
+                  label: (item) => {
+                    const v = item.parsed.x;
+                    return `${Number(v).toFixed(2)}%`;
+                  },
+                },
+              },
+            },
+            scales: {
+              x: {
+                title: { display: true, text: "% change (time; lower is better)" },
+                ticks: { callback: (v) => `${v}%` },
+              },
+              y: {
+                ticks: {
+                  autoSkip: false,
+                  callback: (_v, idx) => {
+                    const s = labels[idx] ?? "";
+                    const max = 42;
+                    return s.length > max ? s.slice(0, max - 1) + "…" : s;
+                  },
+                },
+              },
+            },
+          },
+        });
+
+        const prevSha = safeSha(prev?.head?.sha);
+        const nextSha = safeSha(next?.head?.sha);
+        $("deltaHint").textContent =
+          `Comparing ${formatDate(prev.run_at)} (${prevSha}) → ${formatDate(next.run_at)} (${nextSha}). ` +
+          `Showing up to ${Math.min(MAX, changes.length)} benchmarks (full names in tooltip).`;
+      }
+
+      function populateSelect(selectEl, options, value) {
+        selectEl.innerHTML = "";
+        for (const opt of options) {
+          const o = document.createElement("option");
+          o.value = opt.value ?? opt;
+          o.textContent = opt.label ?? opt;
+          selectEl.appendChild(o);
+        }
+        if (value) selectEl.value = value;
+      }
+
+      function clampInt(n, lo, hi) {
+        const x = Number(n);
+        if (!Number.isFinite(x)) return lo;
+        return Math.min(hi, Math.max(lo, Math.trunc(x)));
+      }
+
+      async function fetchHistory() {
+        const res = await fetch(HISTORY_URL, {
+          cache: "no-store",
+          headers: { Accept: "application/json" },
+        });
+        if (!res.ok) {
+          throw new Error(`Fetch failed: ${res.status} ${res.statusText}`);
+        }
+        return await res.json();
+      }
+
+      function updateMeta(history) {
+        const runs = Array.isArray(history?.runs) ? history.runs : [];
+        const last = runs.length ? runs.slice().sort((a, b) => new Date(a.run_at) - new Date(b.run_at))[runs.length - 1] : null;
+        const lastTxt = last?.run_at ? formatDate(last.run_at) : "(unknown)";
+        $("meta").innerHTML =
+          `Source: <a href="${HISTORY_URL}">${HISTORY_URL}</a> · Runs: ${runs.length} · Latest: ${lastTxt}`;
+      }
+
+      async function main() {
+        setError("");
+        destroyCharts();
+        $("meta").textContent = "Loading…";
+
+        const history = await fetchHistory();
+        updateMeta(history);
+
+        const suiteEl = $("suite");
+        const showAllEl = $("showAll");
+        const benchEl = $("benchmark");
+        const metricEl = $("metric");
+        const showBandEl = $("showBand");
+
+        const baseRunEl = $("baseRun");
+        const cmpRunEl = $("cmpRun");
+        const basePrevEl = $("basePrev");
+        const baseNextEl = $("baseNext");
+        const cmpPrevEl = $("cmpPrev");
+        const cmpNextEl = $("cmpNext");
+
+        function rerender() {
+          const suite = suiteEl.value;
+          const suiteRuns = getSuiteRuns(history, suite);
+          const bench = benchEl.value;
+          const metric = metricEl.value;
+          const showBand = showBandEl.checked;
+
+          benchEl.disabled = showAllEl.checked;
+
+          renderTimeSeries({ suiteRuns, suite, benchmarkName: bench, metric, showBand });
+          renderLatestDelta({
+            suiteRuns,
+            suite,
+            metric,
+            baseIdx: baseRunEl.value,
+            cmpIdx: cmpRunEl.value,
+          });
+        }
+
+        function onSuiteChange() {
+          const suite = suiteEl.value;
+          const suiteRuns = getSuiteRuns(history, suite);
+          const benchmarks = listBenchmarks(suiteRuns);
+          const defaultBench = pickDefaultBenchmark(benchmarks);
+
+          populateSelect(
+            benchEl,
+            benchmarks.map((b) => ({ value: b, label: b })),
+            defaultBench
+          );
+
+          const metrics = listMetricsForSuite(suite);
+          const defaultMetric = suite === "criterion" ? "estimate" : "mean_sec";
+          populateSelect(metricEl, metrics, defaultMetric);
+
+          // Delta chart run pickers
+          const runLabels = makeLabels(suiteRuns);
+          const runOptions = runLabels.map((label, idx) => ({
+            value: String(idx),
+            label,
+          }));
+
+          const n = suiteRuns.length;
+          const defaultBase = String(Math.max(0, n - 2));
+          const defaultCmp = String(Math.max(0, n - 1));
+
+          const prevBase = baseRunEl.value;
+          const prevCmp = cmpRunEl.value;
+          const baseVal = prevBase !== "" && Number(prevBase) < n ? prevBase : defaultBase;
+          const cmpVal = prevCmp !== "" && Number(prevCmp) < n ? prevCmp : defaultCmp;
+
+          populateSelect(baseRunEl, runOptions, baseVal);
+          populateSelect(cmpRunEl, runOptions, cmpVal);
+
+          rerender();
+        }
+
+        function stepRun(selectEl, delta) {
+          const suite = suiteEl.value;
+          const suiteRuns = getSuiteRuns(history, suite);
+          const hi = suiteRuns.length - 1;
+          if (hi < 0) return;
+
+          const otherEl = selectEl === baseRunEl ? cmpRunEl : baseRunEl;
+          let idx = clampInt(selectEl.value, 0, hi) + delta;
+          idx = clampInt(idx, 0, hi);
+
+          const other = clampInt(otherEl.value, 0, hi);
+          if (idx === other) {
+            idx = clampInt(idx + (delta >= 0 ? 1 : -1), 0, hi);
+            if (idx === other) return;
+          }
+
+          selectEl.value = String(idx);
+          rerender();
+        }
+
+        suiteEl.addEventListener("change", onSuiteChange);
+        showAllEl.addEventListener("change", rerender);
+        benchEl.addEventListener("change", rerender);
+        metricEl.addEventListener("change", rerender);
+        showBandEl.addEventListener("change", rerender);
+
+        baseRunEl.addEventListener("change", rerender);
+        cmpRunEl.addEventListener("change", rerender);
+        basePrevEl.addEventListener("click", () => stepRun(baseRunEl, -1));
+        baseNextEl.addEventListener("click", () => stepRun(baseRunEl, +1));
+        cmpPrevEl.addEventListener("click", () => stepRun(cmpRunEl, -1));
+        cmpNextEl.addEventListener("click", () => stepRun(cmpRunEl, +1));
+
+        $("reload").addEventListener("click", async () => {
+          try {
+            await main();
+          } catch (e) {
+            setError(String(e?.stack ?? e));
+          }
+        });
+
+        onSuiteChange();
+      }
+
+      main().catch((e) => {
+        setError(
+          `Failed to load or render bench history.\n\n${String(e?.stack ?? e)}\n\n` +
+            `If this is opened from a file:// URL, your browser may block some requests depending on settings. ` +
+            `Try a different browser, or run a local server like: python3 -m http.server -d scripts 8000`
+        );
+      });
+    </script>
+  </body>
+</html>
diff --git a/scripts/test_bench_results.sh b/scripts/test_bench_results.sh
new file mode 100755
index 00000000..186d3005
--- /dev/null
+++ b/scripts/test_bench_results.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Local test harness for scripts/bench_results.js
+# Usage:
+#   bash scripts/test_bench_results.sh
+
+repo="acme/example"
+
+workdir="$(mktemp -d)"
+trap 'rm -rf "$workdir"' EXIT
+
+cat >"$workdir/hyperfine.json" <<'JSON'
+{
+  "results": [
+    {
+      "command": "echo hello",
+      "mean": 0.012,
+      "min": 0.011,
+      "max": 0.013,
+      "stddev": 0.001,
+      "times": [0.011, 0.012, 0.013]
+    }
+  ]
+}
+JSON
+
+cat >"$workdir/criterion-compare.txt" <<'TXT'
+Benchmarking sample_people/sampling_multiple_l_reservoir
+Benchmarking sample_people/another_bench
+  time:   [10.771 ms 10.811 ms 10.866 ms]
+TXT
+
+cat >"$workdir/bench-history.json" <<'JSON'
+{
+  "schema": 1,
+  "updated_at": "2020-01-01T00:00:00.000Z",
+  "runs": [
+    {
+      "run_at": "2020-01-01T00:00:00.000Z",
+      "branch": "main",
+      "hyperfine": [],
+      "criterion": []
+    }
+  ]
+}
+JSON
+
+node "$(pwd)/scripts/bench_results.js" \
+  --repo "$repo" \
+  --branch "feature/test" \
+  --pr-number 42 \
+  --base-ref main --base-sha 1111111 \
+  --head-ref feature/test --head-sha 2222222 \
+  --run-at "2026-02-09T00:00:00.000Z" \
+  --hyperfine-json "$workdir/hyperfine.json" \
+  --criterion-log "$workdir/criterion-compare.txt" \
+  --history-in "$workdir/bench-history.json" \
+  --out-current "$workdir/bench-current.json" \
+  --history-out "$workdir/bench-history.out.json"
+
+jq -e '.branch=="feature/test" and (.hyperfine.results|length)==1 and (.criterion.results|length)>=1' "$workdir/bench-current.json" >/dev/null
+jq -e '(.runs|length)==2 and (.runs[-1].branch=="feature/test")' "$workdir/bench-history.out.json" >/dev/null
+
+# Validate criterion time triple parsed.
+low_ms=$(jq -r '.criterion.results[] | select(.name=="sample_people/another_bench") | .time_text[0]' "$workdir/bench-current.json")
+if [[ "$low_ms" != "10.771 ms" ]]; then
+  echo "Expected criterion low time 10.771 ms, got: $low_ms" >&2
+  exit 1
+fi
+
+echo "OK: bench_results.js test passed"

From 61cd894d6b9ccab8784519bf67ad3774e045e7d8 Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Wed, 11 Feb 2026 11:16:59 -0500
Subject: [PATCH 2/7] update the index.html file

---
 scripts/index.html | 187 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 158 insertions(+), 29 deletions(-)

diff --git a/scripts/index.html b/scripts/index.html
index 20fd94ca..e22da55b 100644
--- a/scripts/index.html
+++ b/scripts/index.html
@@ -9,10 +9,18 @@
         color-scheme: light dark;
       }
 
+      *,
+      *::before,
+      *::after {
+        box-sizing: border-box;
+      }
+
       body {
         margin: 0;
         font: 14px/1.45 system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell,
           Noto Sans, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji";
+        background: Canvas;
+        color: CanvasText;
       }
 
       header {
@@ -35,6 +43,7 @@
       main {
         padding: 16px 20px 24px;
         max-width: 1200px;
+        margin: 0 auto;
       }
 
       .row {
@@ -53,18 +62,48 @@
       }
 
       select,
+      input[type="url"],
+      input[type="text"],
+      input[type="file"],
       button {
         font: inherit;
         padding: 8px 10px;
         border-radius: 8px;
         border: 1px solid rgba(127, 127, 127, 0.35);
-        background: transparent;
+        background: Field;
+        color: FieldText;
         min-width: 260px;
       }
 
+      input[type="file"] {
+        padding: 6px 10px;
+      }
+
+      input[type="checkbox"] {
+        width: 16px;
+        height: 16px;
+        margin: 0;
+        accent-color: Highlight;
+      }
+
       button {
         min-width: auto;
         cursor: pointer;
+        background: ButtonFace;
+        color: ButtonText;
+      }
+
+      select:focus-visible,
+      input[type="url"]:focus-visible,
+      input[type="text"]:focus-visible,
+      input[type="file"]:focus-visible,
+      button:focus-visible {
+        outline: 2px solid Highlight;
+        outline-offset: 2px;
+      }
+
+      button:hover {
+        border-color: rgba(127, 127, 127, 0.55);
       }
 
       .checkbox {
@@ -75,6 +114,9 @@
         border: 1px solid rgba(127, 127, 127, 0.35);
         border-radius: 8px;
         user-select: none;
+        opacity: 0.9;
+        background: Field;
+        color: FieldText;
       }
 
       .checkbox input {
@@ -133,6 +175,18 @@ <h1>IXA benchmark history</h1>
 
     <main>
       <div class="row">
+        <label style="flex: 1 1 520px; min-width: 320px">
+          History URL
+          <input id="historyUrl" type="url" spellcheck="false" />
+        </label>
+
+        <button id="loadUrl" type="button">Load URL</button>
+
+        <label style="flex: 1 1 420px; min-width: 320px">
+          Load from disk
+          <input id="historyFile" type="file" accept="application/json,.json" />
+        </label>
+
         <label>
           Suite
           <select id="suite">
@@ -203,8 +257,14 @@ <h2>Latest vs previous (percent change)</h2>
 
     <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
     <script>
-      const HISTORY_URL =
-        "https://raw.githubusercontent.com/cdc-as81/ixa/refs/heads/gh-pages/bench-history.json";
+      const DEFAULT_HISTORY_URL =
+        "https://raw.githubusercontent.com/cdcgov/ixa/refs/heads/bench-history/bench-history.json";
+
+      /** @type {{ kind: 'url', url: string } | { kind: 'file', file: File }} */
+      let currentSource = { kind: "url", url: DEFAULT_HISTORY_URL };
+
+      /** @type {any} */
+      let currentHistory = null;
 
       /**
        * Expected JSON shape (schema v1):
@@ -711,8 +771,8 @@ <h2>Latest vs previous (percent change)</h2>
         return Math.min(hi, Math.max(lo, Math.trunc(x)));
       }
 
-      async function fetchHistory() {
-        const res = await fetch(HISTORY_URL, {
+      async function fetchHistoryFromUrl(url) {
+        const res = await fetch(url, {
           cache: "no-store",
           headers: { Accept: "application/json" },
         });
@@ -722,21 +782,40 @@ <h2>Latest vs previous (percent change)</h2>
         return await res.json();
       }
 
-      function updateMeta(history) {
+      async function fetchHistoryFromFile(file) {
+        const text = await file.text();
+        try {
+          return JSON.parse(text);
+        } catch (e) {
+          throw new Error(
+            `Failed to parse JSON from file: ${file?.name ?? "(unknown)"}\n\n${String(e?.message ?? e)}`
+          );
+        }
+      }
+
+      async function fetchHistory(source) {
+        if (source?.kind === "file") return await fetchHistoryFromFile(source.file);
+        const url = source?.url ?? DEFAULT_HISTORY_URL;
+        return await fetchHistoryFromUrl(url);
+      }
+
+      function updateMeta(history, source) {
         const runs = Array.isArray(history?.runs) ? history.runs : [];
         const last = runs.length ? runs.slice().sort((a, b) => new Date(a.run_at) - new Date(b.run_at))[runs.length - 1] : null;
         const lastTxt = last?.run_at ? formatDate(last.run_at) : "(unknown)";
-        $("meta").innerHTML =
-          `Source: <a href="${HISTORY_URL}">${HISTORY_URL}</a> · Runs: ${runs.length} · Latest: ${lastTxt}`;
-      }
+        if (source?.kind === "file") {
+          $("meta").textContent =
+            `Source: ${source.file?.name ?? "(local file)"} · Runs: ${runs.length} · Latest: ${lastTxt}`;
+          return;
+        }
 
-      async function main() {
-        setError("");
-        destroyCharts();
-        $("meta").textContent = "Loading…";
+        const url = source?.url ?? DEFAULT_HISTORY_URL;
+        $("meta").innerHTML = `Source: <a href="${url}">${url}</a> · Runs: ${runs.length} · Latest: ${lastTxt}`;
+      }
 
-        const history = await fetchHistory();
-        updateMeta(history);
+      function initUiOnce() {
+        if (initUiOnce._done) return;
+        initUiOnce._done = true;
 
         const suiteEl = $("suite");
         const showAllEl = $("showAll");
@@ -752,8 +831,9 @@ <h2>Latest vs previous (percent change)</h2>
         const cmpNextEl = $("cmpNext");
 
         function rerender() {
+          if (!currentHistory) return;
           const suite = suiteEl.value;
-          const suiteRuns = getSuiteRuns(history, suite);
+          const suiteRuns = getSuiteRuns(currentHistory, suite);
           const bench = benchEl.value;
           const metric = metricEl.value;
           const showBand = showBandEl.checked;
@@ -771,20 +851,25 @@ <h2>Latest vs previous (percent change)</h2>
         }
 
         function onSuiteChange() {
+          if (!currentHistory) return;
           const suite = suiteEl.value;
-          const suiteRuns = getSuiteRuns(history, suite);
+          const suiteRuns = getSuiteRuns(currentHistory, suite);
           const benchmarks = listBenchmarks(suiteRuns);
           const defaultBench = pickDefaultBenchmark(benchmarks);
 
+          const prevBench = benchEl.value;
+          const nextBench = benchmarks.includes(prevBench) ? prevBench : defaultBench;
           populateSelect(
             benchEl,
             benchmarks.map((b) => ({ value: b, label: b })),
-            defaultBench
+            nextBench
           );
 
           const metrics = listMetricsForSuite(suite);
           const defaultMetric = suite === "criterion" ? "estimate" : "mean_sec";
-          populateSelect(metricEl, metrics, defaultMetric);
+          const prevMetric = metricEl.value;
+          const nextMetric = metrics.includes(prevMetric) ? prevMetric : defaultMetric;
+          populateSelect(metricEl, metrics, nextMetric);
 
           // Delta chart run pickers
           const runLabels = makeLabels(suiteRuns);
@@ -809,8 +894,9 @@ <h2>Latest vs previous (percent change)</h2>
         }
 
         function stepRun(selectEl, delta) {
+          if (!currentHistory) return;
           const suite = suiteEl.value;
-          const suiteRuns = getSuiteRuns(history, suite);
+          const suiteRuns = getSuiteRuns(currentHistory, suite);
           const hi = suiteRuns.length - 1;
           if (hi < 0) return;
 
@@ -841,18 +927,61 @@ <h2>Latest vs previous (percent change)</h2>
         cmpPrevEl.addEventListener("click", () => stepRun(cmpRunEl, -1));
         cmpNextEl.addEventListener("click", () => stepRun(cmpRunEl, +1));
 
-        $("reload").addEventListener("click", async () => {
-          try {
-            await main();
-          } catch (e) {
-            setError(String(e?.stack ?? e));
-          }
-        });
+        // Expose for loaders
+        initUiOnce._onSuiteChange = onSuiteChange;
+      }
+
+      async function loadAndRender(source) {
+        setError("");
+        destroyCharts();
+        $("meta").textContent = "Loading…";
 
-        onSuiteChange();
+        const history = await fetchHistory(source);
+        currentHistory = history;
+        currentSource = source?.kind ? source : { kind: "url", url: DEFAULT_HISTORY_URL };
+        updateMeta(history, currentSource);
+
+        initUiOnce();
+        initUiOnce._onSuiteChange?.();
       }
 
-      main().catch((e) => {
+      // UI wiring for sources
+      $("historyUrl").value = DEFAULT_HISTORY_URL;
+
+      $("loadUrl").addEventListener("click", async () => {
+        try {
+          const url = String($("historyUrl").value ?? "").trim() || DEFAULT_HISTORY_URL;
+          await loadAndRender({ kind: "url", url });
+        } catch (e) {
+          setError(String(e?.stack ?? e));
+        }
+      });
+
+      $("historyUrl").addEventListener("keydown", async (ev) => {
+        if (ev.key !== "Enter") return;
+        ev.preventDefault();
+        $("loadUrl").click();
+      });
+
+      $("historyFile").addEventListener("change", async (ev) => {
+        try {
+          const file = ev?.target?.files?.[0];
+          if (!file) return;
+          await loadAndRender({ kind: "file", file });
+        } catch (e) {
+          setError(String(e?.stack ?? e));
+        }
+      });
+
+      $("reload").addEventListener("click", async () => {
+        try {
+          await loadAndRender(currentSource);
+        } catch (e) {
+          setError(String(e?.stack ?? e));
+        }
+      });
+
+      loadAndRender(currentSource).catch((e) => {
         setError(
           `Failed to load or render bench history.\n\n${String(e?.stack ?? e)}\n\n` +
             `If this is opened from a file:// URL, your browser may block some requests depending on settings. ` +

From 199bfea756914555379a3f5cae17d076ad1c3fa9 Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Wed, 11 Feb 2026 12:17:13 -0500
Subject: [PATCH 3/7] fix index.html

---
 scripts/index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/index.html b/scripts/index.html
index e22da55b..693ec906 100644
--- a/scripts/index.html
+++ b/scripts/index.html
@@ -258,7 +258,7 @@ <h2>Latest vs previous (percent change)</h2>
     <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
     <script>
       const DEFAULT_HISTORY_URL =
-        "https://raw.githubusercontent.com/cdcgov/ixa/refs/heads/bench-history/bench-history.json";
+        "https://raw.githubusercontent.com/cdcgov/ixa/refs/heads/benchmarks-history/bench-history.json";
 
       /** @type {{ kind: 'url', url: string } | { kind: 'file', file: File }} */
       let currentSource = { kind: "url", url: DEFAULT_HISTORY_URL };

From cdd21b82f3ed54000f3e198a9789cde35c41fffa Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Wed, 11 Feb 2026 12:21:06 -0500
Subject: [PATCH 4/7] fix Hyperfine pr comment

---
 .github/workflows/bench-compare.yaml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml
index 2d5e0fde..4a8da6f0 100644
--- a/.github/workflows/bench-compare.yaml
+++ b/.github/workflows/bench-compare.yaml
@@ -33,9 +33,9 @@ jobs:
         run: git checkout ${{ github.event.pull_request.head.ref }}
 
       - name: Run hyperfine benchmarks
-        run:
-          mise run bench:hyperfine -- -- --export-markdown hyperfine.md
-          --export-json hyperfine.json
+        run: |
+          set -euo pipefail
+          mise run bench:hyperfine -- -- --export-markdown hyperfine.md --export-json hyperfine.json
 
       - name: Run criterion benchmarks (baseline compare)
         run: |
@@ -54,9 +54,7 @@ jobs:
           echo '' >> results.md
           echo '#### Hyperfine' >> results.md
           echo '' >> results.md
-          echo '```' >> results.md
           cat hyperfine.md >> results.md
-          echo '```' >> results.md
           echo '' >> results.md
           echo '#### Criterion' >> results.md
           echo '' >> results.md

From 90d74c50a324c20ca5342a9658bc64e7a745ffed Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Wed, 11 Feb 2026 12:32:52 -0500
Subject: [PATCH 5/7] updated bench history scripts

---
 scripts/bench_results.js      | 19 +++++++++++--
 scripts/test_bench_results.sh | 52 ++++++++++++++++++++++++++++-------
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/scripts/bench_results.js b/scripts/bench_results.js
index 11ed31a7..035e3d46 100644
--- a/scripts/bench_results.js
+++ b/scripts/bench_results.js
@@ -133,6 +133,14 @@ function parseHyperfineJson(hyperfineJson) {
   });
 }
 
+function normalizeRunPrNumber(run) {
+  if (!run || typeof run !== 'object') return undefined;
+  const raw = run.pr_number;
+  if (raw == null) return undefined;
+  const n = Number(raw);
+  return Number.isFinite(n) ? n : undefined;
+}
+
 function main() {
   const argv = process.argv.slice(2);
 
@@ -201,7 +209,7 @@ function main() {
 
   history.schema = 1;
   history.updated_at = runAt;
-  history.runs.push({
+  const newRun = {
     run_at: runAt,
     branch,
     pr_number: prNumber,
@@ -209,7 +217,14 @@ function main() {
     head: payload.head,
     hyperfine: payload.hyperfine.results,
     criterion: payload.criterion.results,
-  });
+  };
+
+  // For PRs, keep a single entry per PR number (reruns update in-place rather than append).
+  if (Number.isFinite(prNumber)) {
+    history.runs = history.runs.filter((r) => normalizeRunPrNumber(r) !== prNumber);
+  }
+
+  history.runs.push(newRun);
 
   fs.writeFileSync(historyPath, JSON.stringify(history, null, 2));
 }
diff --git a/scripts/test_bench_results.sh b/scripts/test_bench_results.sh
index 186d3005..50bd8aa2 100755
--- a/scripts/test_bench_results.sh
+++ b/scripts/test_bench_results.sh
@@ -35,17 +35,14 @@ cat >"$workdir/bench-history.json" <<'JSON'
 {
   "schema": 1,
   "updated_at": "2020-01-01T00:00:00.000Z",
-  "runs": [
-    {
-      "run_at": "2020-01-01T00:00:00.000Z",
-      "branch": "main",
-      "hyperfine": [],
-      "criterion": []
-    }
-  ]
+  "runs": []
 }
 JSON
 
+history1="$workdir/bench-history.1.json"
+history2="$workdir/bench-history.2.json"
+history3="$workdir/bench-history.3.json"
+
 node "$(pwd)/scripts/bench_results.js" \
   --repo "$repo" \
   --branch "feature/test" \
@@ -57,10 +54,45 @@ node "$(pwd)/scripts/bench_results.js" \
   --criterion-log "$workdir/criterion-compare.txt" \
   --history-in "$workdir/bench-history.json" \
   --out-current "$workdir/bench-current.json" \
-  --history-out "$workdir/bench-history.out.json"
+  --history-out "$history1"
 
 jq -e '.branch=="feature/test" and (.hyperfine.results|length)==1 and (.criterion.results|length)>=1' "$workdir/bench-current.json" >/dev/null
-jq -e '(.runs|length)==2 and (.runs[-1].branch=="feature/test")' "$workdir/bench-history.out.json" >/dev/null
+jq -e '(.runs|length)==1 and (.runs[0].pr_number==42) and (.runs[0].head.sha=="2222222")' "$history1" >/dev/null
+
+# Add another PR; should append (history grows).
+node "$(pwd)/scripts/bench_results.js" \
+  --repo "$repo" \
+  --branch "feature/other" \
+  --pr-number 43 \
+  --base-ref main --base-sha 1111111 \
+  --head-ref feature/other --head-sha 3333333 \
+  --run-at "2026-02-09T01:00:00.000Z" \
+  --hyperfine-json "$workdir/hyperfine.json" \
+  --criterion-log "$workdir/criterion-compare.txt" \
+  --history-in "$history1" \
+  --out-current "$workdir/bench-current.json" \
+  --history-out "$history2"
+
+jq -e '(.runs|length)==2 and ([.runs[].pr_number]|sort)==[42,43]' "$history2" >/dev/null
+
+# Re-run PR 42; should update existing entry (history length unchanged).
+node "$(pwd)/scripts/bench_results.js" \
+  --repo "$repo" \
+  --branch "feature/test-rerun" \
+  --pr-number 42 \
+  --base-ref main --base-sha 1111111 \
+  --head-ref feature/test-rerun --head-sha 4444444 \
+  --run-at "2026-02-09T02:00:00.000Z" \
+  --hyperfine-json "$workdir/hyperfine.json" \
+  --criterion-log "$workdir/criterion-compare.txt" \
+  --history-in "$history2" \
+  --out-current "$workdir/bench-current.json" \
+  --history-out "$history3"
+
+jq -e '(.runs|length)==2 and ([.runs[].pr_number]|sort)==[42,43]' "$history3" >/dev/null
+jq -e '([.runs[] | select(.pr_number==42)] | length)==1' "$history3" >/dev/null
+jq -e '(.runs[] | select(.pr_number==42) | .head.sha)=="4444444" and (.runs[] | select(.pr_number==42) | .run_at)=="2026-02-09T02:00:00.000Z" and (.runs[] | select(.pr_number==42) | .branch)=="feature/test-rerun"' "$history3" >/dev/null
+jq -e '(.runs[] | select(.pr_number==43) | .head.sha)=="3333333"' "$history3" >/dev/null
 
 # Validate criterion time triple parsed.
 low_ms=$(jq -r '.criterion.results[] | select(.name=="sample_people/another_bench") | .time_text[0]' "$workdir/bench-current.json")

From 8a45ca152a43303b9a56cea4828d47ea2e25a8a9 Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Thu, 12 Feb 2026 09:56:55 -0500
Subject: [PATCH 6/7] update bench-compare

---
 .github/workflows/bench-compare.yaml | 79 ++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml
index 4a8da6f0..5c497a2b 100644
--- a/.github/workflows/bench-compare.yaml
+++ b/.github/workflows/bench-compare.yaml
@@ -3,16 +3,15 @@ name: Benchmark Pull Requests
 on:
   pull_request:
 
+env:
+  BENCH_HISTORY_BRANCH: benchmarks-history
+  BENCH_HISTORY_DIR: tmp-bench-history
+
 jobs:
   compare-branches:
     runs-on: ubuntu-latest
     permissions:
-      actions: write
-      contents: write
-      pull-requests: write
-    env:
-      BENCH_HISTORY_BRANCH: benchmarks-history
-      BENCH_HISTORY_DIR: tmp-bench-history
+      contents: read
     steps:
       - uses: actions/checkout@v6
       - name: Setup environment
@@ -62,14 +61,11 @@ jobs:
           cat criterion-regressions.txt >> results.md
           echo '```' >> results.md
 
-      - name: Add comment to PR
-        if: ${{ github.event_name == 'pull_request' }}
-        run: |
-          gh pr comment ${{ github.event.pull_request.number }} \
-            --repo ${{ github.repository }} \
-            --body-file results.md
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Upload PR comment artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: pr-comment
+          path: results.md
 
       - name:
           Download previous bench history from benchmarks-history branch (best
@@ -113,18 +109,59 @@ jobs:
             --out-current bench-current.json \
             --history-out bench-history.json
 
-      - name: Publish bench-history.json to benchmarks-history branch
-        if:
-          ${{ github.event_name != 'pull_request' ||
-          github.event.pull_request.head.repo.full_name == github.repository }}
+      - name: Upload bench history artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: bench-history
+          path: bench-history.json
+
+  comment-on-pr:
+    runs-on: ubuntu-latest
+    needs: compare-branches
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Download PR comment artifact
+        uses: actions/download-artifact@v6
+        with:
+          name: pr-comment
+
+      - name: Add comment to PR
+        run: |
+          gh pr comment ${{ github.event.pull_request.number }} \
+            --repo ${{ github.repository }} \
+            --body-file results.md
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  publish-bench-history:
+    runs-on: ubuntu-latest
+    needs: compare-branches
+    permissions:
+      contents: write
+    steps:
+      - name: Download bench history artifact
+        uses: actions/download-artifact@v6
+        with:
+          name: bench-history
+
+      - name: Checkout benchmarks-history branch
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ env.BENCH_HISTORY_BRANCH }}
+          path: ${{ env.BENCH_HISTORY_DIR }}
+          fetch-depth: 1
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Publish bench-history.json to benchmarks-history branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
         run: |
           set -euo pipefail
-          rm -rf "${BENCH_HISTORY_DIR}"
-          git clone --depth 1 --branch "${BENCH_HISTORY_BRANCH}" "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" "${BENCH_HISTORY_DIR}"
           cp bench-history.json "${BENCH_HISTORY_DIR}/bench-history.json"
           pushd "${BENCH_HISTORY_DIR}" >/dev/null
+          git checkout "${BENCH_HISTORY_BRANCH}"
           git config user.name "github-actions[bot]"
           git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
           # git diff doesn't consider untracked files; use status to detect any change.
@@ -133,6 +170,6 @@ jobs:
             exit 0
           fi
           git add bench-history.json
-          git commit -m "Update bench-history.json"
+          git commit -m "Update bench-history.json (PR #${PR_NUMBER} @ ${HEAD_SHA:0:7})"
           git push
           popd >/dev/null

From a0dcc7f6e83b09221d0008a899637f3f1028dc81 Mon Sep 17 00:00:00 2001
From: Gil Sharon <as81@cdc.gov>
Date: Thu, 12 Feb 2026 11:00:33 -0500
Subject: [PATCH 7/7] updated bench-compare

---
 .github/workflows/bench-compare.yaml | 14 ++++++++++++++
 scripts/bench_results.js             |  7 ++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml
index 5c497a2b..16086532 100644
--- a/.github/workflows/bench-compare.yaml
+++ b/.github/workflows/bench-compare.yaml
@@ -67,9 +67,20 @@ jobs:
           name: pr-comment
           path: results.md
 
+      - name: Skip bench history JSON (PR not targeting main)
+        if: ${{ github.event.pull_request.base.ref != 'main' }}
+        run: |
+          set -euo pipefail
+          echo "PR targets '${{ github.event.pull_request.base.ref }}', not 'main'; skipping bench history JSON steps."
+          {
+            echo "### Bench history"
+            echo "Skipping bench history JSON generation/publish because this PR targets '${{ github.event.pull_request.base.ref }}' (not 'main')."
+          } >> "$GITHUB_STEP_SUMMARY"
+
       - name:
           Download previous bench history from benchmarks-history branch (best
           effort)
+        if: ${{ github.event.pull_request.base.ref == 'main' }}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
@@ -87,6 +98,7 @@ jobs:
           fi
 
       - name: Create JSON results
+        if: ${{ github.event.pull_request.base.ref == 'main' }}
         env:
           PR_NUMBER: ${{ github.event.pull_request.number || '' }}
           BASE_REF: ${{ github.event.pull_request.base.ref || '' }}
@@ -110,6 +122,7 @@ jobs:
             --history-out bench-history.json
 
       - name: Upload bench history artifact
+        if: ${{ github.event.pull_request.base.ref == 'main' }}
         uses: actions/upload-artifact@v6
         with:
           name: bench-history
@@ -137,6 +150,7 @@ jobs:
   publish-bench-history:
     runs-on: ubuntu-latest
     needs: compare-branches
+    if: ${{ github.event.pull_request.base.ref == 'main' }}
     permissions:
       contents: write
     steps:
diff --git a/scripts/bench_results.js b/scripts/bench_results.js
index 035e3d46..5c15f161 100644
--- a/scripts/bench_results.js
+++ b/scripts/bench_results.js
@@ -90,7 +90,12 @@ function parseCriterionCompareLog(text) {
   for (const line of lines) {
     const benchMatch = line.match(/^Benchmarking\s+(.+?)\s*$/);
     if (benchMatch) {
-      currentName = benchMatch[1].trim();
+      const raw = benchMatch[1].trim();
+      // Criterion emits progress/status suffixes like:
+      //   "Benchmarking foo: Warming up ...", "Benchmarking foo: Collecting ...", "Benchmarking foo: Analyzing"
+      // We want the stable benchmark identifier ("foo").
+      const statusStripped = raw.replace(/\s*:\s*(Warming up|Collecting|Analyzing)\b.*$/u, '').trim();
+      currentName = statusStripped || raw;
       continue;
     }