From 00efc729ad47304053fc824087b9e448d75c656b Mon Sep 17 00:00:00 2001 From: Gil Sharon Date: Tue, 10 Feb 2026 15:23:24 -0500 Subject: [PATCH 1/7] chore: add benchmark history --- .github/workflows/bench-compare.yaml | 100 +++- scripts/bench_results.js | 217 +++++++ scripts/index.html | 864 +++++++++++++++++++++++++++ scripts/test_bench_results.sh | 72 +++ 4 files changed, 1243 insertions(+), 10 deletions(-) create mode 100644 scripts/bench_results.js create mode 100644 scripts/index.html create mode 100755 scripts/test_bench_results.sh diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml index a7141e07..2d5e0fde 100644 --- a/.github/workflows/bench-compare.yaml +++ b/.github/workflows/bench-compare.yaml @@ -7,7 +7,12 @@ jobs: compare-branches: runs-on: ubuntu-latest permissions: + actions: write + contents: write pull-requests: write + env: + BENCH_HISTORY_BRANCH: benchmarks-history + BENCH_HISTORY_DIR: tmp-bench-history steps: - uses: actions/checkout@v6 - name: Setup environment @@ -28,10 +33,23 @@ jobs: run: git checkout ${{ github.event.pull_request.head.ref }} - name: Run hyperfine benchmarks - run: mise run bench:hyperfine -- -- --export-markdown hyperfine.md + run: + mise run bench:hyperfine -- -- --export-markdown hyperfine.md + --export-json hyperfine.json - - name: Run benchmarks on head branch and format output + - name: Run criterion benchmarks (baseline compare) run: | + set -euo pipefail + if cargo bench -p ixa-bench -- --baseline base 2>&1 | tee criterion-compare.txt; then + cargo run -q -p ixa-bench --bin check_criterion_regressions | tee criterion-regressions.txt + else + echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" | tee criterion-regressions.txt + mise run bench:criterion 2>&1 | tee -a criterion-compare.txt + fi + + - name: Format PR comment + run: | + set -euo pipefail echo '### Benchmark Results' > results.md echo '' >> results.md echo '#### Hyperfine' >> results.md @@ -42,19 +60,81 @@ jobs: echo '' >> results.md echo '#### Criterion' >> results.md echo '' >> results.md - if ! cargo bench -p ixa-bench -- --baseline base 2>&1; then - echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" >> results.md - mise run bench:criterion 2>&1 - else - echo '```' >> results.md - cargo run -q -p ixa-bench --bin check_criterion_regressions >> results.md - echo '```' >> results.md - fi + echo '```' >> results.md + cat criterion-regressions.txt >> results.md + echo '```' >> results.md - name: Add comment to PR + if: ${{ github.event_name == 'pull_request' }} run: | gh pr comment ${{ github.event.pull_request.number }} \ --repo ${{ github.repository }} \ --body-file results.md env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: + Download previous bench history from benchmarks-history branch (best + effort) + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + # Fetches raw JSON directly from the benchmarks-history branch. + # If it doesn't exist yet, we continue with an empty history. + if gh api \ + -H "Accept: application/vnd.github+json" \ + "/repos/${GITHUB_REPOSITORY}/contents/bench-history.json?ref=${BENCH_HISTORY_BRANCH}" \ + --jq '.content' > bench-history.b64 2>/dev/null; then + base64 -d bench-history.b64 > bench-history.json + echo "Downloaded bench-history.json from ${BENCH_HISTORY_BRANCH}" + else + echo "No bench-history.json found on ${BENCH_HISTORY_BRANCH} (first run?)." + fi + + - name: Create JSON results + env: + PR_NUMBER: ${{ github.event.pull_request.number || '' }} + BASE_REF: ${{ github.event.pull_request.base.ref || '' }} + BASE_SHA: ${{ github.event.pull_request.base.sha || '' }} + HEAD_REF: ${{ github.event.pull_request.head.ref || github.ref_name }} + HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + RUN_BRANCH: + ${{ github.event.pull_request.head.ref || github.ref_name }} + run: | + set -euo pipefail + node scripts/bench_results.js \ + --repo "${GITHUB_REPOSITORY}" \ + --branch "${RUN_BRANCH}" \ + --pr-number "${PR_NUMBER}" \ + --base-ref "${BASE_REF}" --base-sha "${BASE_SHA}" \ + --head-ref "${HEAD_REF}" --head-sha "${HEAD_SHA}" \ + --hyperfine-json hyperfine.json \ + --criterion-log criterion-compare.txt \ + --history-in bench-history.json \ + --out-current bench-current.json \ + --history-out bench-history.json + + - name: Publish bench-history.json to benchmarks-history branch + if: + ${{ github.event_name != 'pull_request' || + github.event.pull_request.head.repo.full_name == github.repository }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + rm -rf "${BENCH_HISTORY_DIR}" + git clone --depth 1 --branch "${BENCH_HISTORY_BRANCH}" "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" "${BENCH_HISTORY_DIR}" + cp bench-history.json "${BENCH_HISTORY_DIR}/bench-history.json" + pushd "${BENCH_HISTORY_DIR}" >/dev/null + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + # git diff doesn't consider untracked files; use status to detect any change. + if [ -z "$(git status --porcelain)" ]; then + echo "No changes to bench-history.json" + exit 0 + fi + git add bench-history.json + git commit -m "Update bench-history.json" + git push + popd >/dev/null diff --git a/scripts/bench_results.js b/scripts/bench_results.js new file mode 100644 index 00000000..11ed31a7 --- /dev/null +++ b/scripts/bench_results.js @@ -0,0 +1,217 @@ +#!/usr/bin/env node +/* +Generates benchmark JSON artifacts from Hyperfine + Criterion outputs. + +Usage (example): + node scripts/bench_results.js \ + --repo owner/repo \ + --branch my-branch \ + --pr-number 123 \ + --base-ref main --base-sha abc \ + --head-ref feature --head-sha def \ + --hyperfine-json hyperfine.json \ + --criterion-log criterion-compare.txt \ + --history-in bench-history.json \ + --out-current bench-current.json \ + --history-out bench-history.json + +All args are optional except input files; missing inputs produce empty result arrays. +*/ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +function die(message) { + process.stderr.write(`${message}\n`); + process.exit(2); +} + +function getArgValue(argv, name) { + const idx = argv.indexOf(name); + if (idx === -1) return undefined; + const value = argv[idx + 1]; + if (!value || value.startsWith('--')) die(`Missing value for ${name}`); + return value; +} + +function hasArg(argv, name) { + return argv.includes(name); +} + +function parseMaybeNumber(text) { + if (text == null || text === '') return undefined; + const n = Number(text); + return Number.isFinite(n) ? n : undefined; +} + +function parseDurationToSeconds(text) { + // Examples: "12.3 ms ± 0.2 ms", "1.234 s", "456 µs" + const m = String(text).trim().match(/([0-9]*\.?[0-9]+)\s*([a-zA-Zµμ]+)/); + if (!m) return null; + const value = Number(m[1]); + const unitRaw = m[2]; + const unit = unitRaw.replace('μ', 'µ'); + const factor = { + s: 1, + sec: 1, + ms: 1e-3, + us: 1e-6, + 'µs': 1e-6, + ns: 1e-9, + }[unit] ?? null; + if (!Number.isFinite(value) || factor == null) return null; + return value * factor; +} + +function readTextIfExists(filePath) { + if (!filePath) return ''; + if (!fs.existsSync(filePath)) return ''; + return fs.readFileSync(filePath, 'utf8'); +} + +function readJsonIfExists(filePath) { + if (!filePath) return null; + if (!fs.existsSync(filePath)) return null; + return JSON.parse(fs.readFileSync(filePath, 'utf8')); +} + +function parseCriterionCompareLog(text) { + // Extracts benchmark name and the reported time confidence interval triple. + // Example: + // Benchmarking sample_people/sampling_multiple_l_reservoir + // ... + // time: [10.771 ms 10.811 ms 10.866 ms] + const lines = String(text || '').split(/\r?\n/); + const resultsByName = new Map(); + let currentName = null; + + for (const line of lines) { + const benchMatch = line.match(/^Benchmarking\s+(.+?)\s*$/); + if (benchMatch) { + currentName = benchMatch[1].trim(); + continue; + } + + const timeMatch = line.match( + /\btime:\s*\[\s*([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s*\]/, + ); + if (!timeMatch || !currentName) continue; + + const t1 = timeMatch[1]; + const t2 = timeMatch[2]; + const t3 = timeMatch[3]; + const s1 = parseDurationToSeconds(t1); + const s2 = parseDurationToSeconds(t2); + const s3 = parseDurationToSeconds(t3); + if (s1 == null || s2 == null || s3 == null) continue; + + resultsByName.set(currentName, { + name: currentName, + time_text: [t1, t2, t3], + time_sec: [s1, s2, s3], + }); + } + + return Array.from(resultsByName.values()); +} + +function parseHyperfineJson(hyperfineJson) { + if (!hyperfineJson || !Array.isArray(hyperfineJson.results)) return []; + + return hyperfineJson.results.map((r) => { + const times = Array.isArray(r.times) ? r.times.filter((n) => Number.isFinite(n)) : []; + return { + name: r.command ?? r.parameter ?? 'unknown', + times_sec: times, + mean_sec: Number.isFinite(r.mean) ? r.mean : undefined, + min_sec: Number.isFinite(r.min) ? r.min : undefined, + max_sec: Number.isFinite(r.max) ? r.max : undefined, + stddev_sec: Number.isFinite(r.stddev) ? r.stddev : undefined, + }; + }); +} + +function main() { + const argv = process.argv.slice(2); + + if (hasArg(argv, '--help') || hasArg(argv, '-h')) { + process.stdout.write(fs.readFileSync(__filename, 'utf8').split('\n').slice(0, 40).join('\n') + '\n'); + return; + } + + const repo = getArgValue(argv, '--repo') ?? process.env.GITHUB_REPOSITORY; + const branch = getArgValue(argv, '--branch') ?? process.env.RUN_BRANCH ?? process.env.GITHUB_REF_NAME; + const prNumber = parseMaybeNumber(getArgValue(argv, '--pr-number') ?? process.env.PR_NUMBER); + + const baseRef = getArgValue(argv, '--base-ref') ?? process.env.BASE_REF; + const baseSha = getArgValue(argv, '--base-sha') ?? process.env.BASE_SHA; + + const headRef = getArgValue(argv, '--head-ref') ?? process.env.HEAD_REF; + const headSha = getArgValue(argv, '--head-sha') ?? process.env.HEAD_SHA ?? process.env.GITHUB_SHA; + + const runAt = getArgValue(argv, '--run-at') ?? new Date().toISOString(); + + const hyperfineJsonPath = getArgValue(argv, '--hyperfine-json') ?? 'hyperfine.json'; + const criterionLogPath = getArgValue(argv, '--criterion-log') ?? 'criterion-compare.txt'; + + const outCurrent = getArgValue(argv, '--out-current') ?? 'bench-current.json'; + const historyIn = getArgValue(argv, '--history-in'); + const historyOut = getArgValue(argv, '--history-out') ?? 'bench-history.json'; + + const hyperfineJson = readJsonIfExists(hyperfineJsonPath); + const hyperfineTimings = parseHyperfineJson(hyperfineJson); + + const criterionCompareLog = readTextIfExists(criterionLogPath); + const criterionTimings = parseCriterionCompareLog(criterionCompareLog); + + const payload = { + schema: 1, + generated_at: runAt, + repository: repo, + pr_number: prNumber, + branch, + base: { + ref: baseRef, + sha: baseSha, + }, + head: { + ref: headRef, + sha: headSha, + url: repo && headSha ? `https://github.com/${repo}/commit/${headSha}` : undefined, + }, + hyperfine: { + results: hyperfineTimings, + }, + criterion: { + results: criterionTimings, + }, + }; + + fs.writeFileSync(outCurrent, JSON.stringify(payload, null, 2)); + + // History handling. + const historyPath = path.resolve(historyOut); + const history = historyIn + ? (readJsonIfExists(historyIn) ?? { schema: 1, runs: [] }) + : (readJsonIfExists(historyPath) ?? { schema: 1, runs: [] }); + + if (!Array.isArray(history.runs)) history.runs = []; + + history.schema = 1; + history.updated_at = runAt; + history.runs.push({ + run_at: runAt, + branch, + pr_number: prNumber, + base: payload.base, + head: payload.head, + hyperfine: payload.hyperfine.results, + criterion: payload.criterion.results, + }); + + fs.writeFileSync(historyPath, JSON.stringify(history, null, 2)); +} + +main(); diff --git a/scripts/index.html b/scripts/index.html new file mode 100644 index 00000000..20fd94ca --- /dev/null +++ b/scripts/index.html @@ -0,0 +1,864 @@ + + + + + + IXA Bench History + + + + +
+

IXA benchmark history

+
Loading…
+
+ +
+
+ + + + + + + + + + + +
+ +
+
+

Time series

+
+ +
+
+ +
+

Latest vs previous (percent change)

+
+ + + +
+
+
+
+
+ + +
+ + + + + diff --git a/scripts/test_bench_results.sh b/scripts/test_bench_results.sh new file mode 100755 index 00000000..186d3005 --- /dev/null +++ b/scripts/test_bench_results.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Local test harness for scripts/bench_results.js +# Usage: +# bash scripts/test_bench_results.sh + +repo="acme/example" + +workdir="$(mktemp -d)" +trap 'rm -rf "$workdir"' EXIT + +cat >"$workdir/hyperfine.json" <<'JSON' +{ + "results": [ + { + "command": "echo hello", + "mean": 0.012, + "min": 0.011, + "max": 0.013, + "stddev": 0.001, + "times": [0.011, 0.012, 0.013] + } + ] +} +JSON + +cat >"$workdir/criterion-compare.txt" <<'TXT' +Benchmarking sample_people/sampling_multiple_l_reservoir +Benchmarking sample_people/another_bench + time: [10.771 ms 10.811 ms 10.866 ms] +TXT + +cat >"$workdir/bench-history.json" <<'JSON' +{ + "schema": 1, + "updated_at": "2020-01-01T00:00:00.000Z", + "runs": [ + { + "run_at": "2020-01-01T00:00:00.000Z", + "branch": "main", + "hyperfine": [], + "criterion": [] + } + ] +} +JSON + +node "$(pwd)/scripts/bench_results.js" \ + --repo "$repo" \ + --branch "feature/test" \ + --pr-number 42 \ + --base-ref main --base-sha 1111111 \ + --head-ref feature/test --head-sha 2222222 \ + --run-at "2026-02-09T00:00:00.000Z" \ + --hyperfine-json "$workdir/hyperfine.json" \ + --criterion-log "$workdir/criterion-compare.txt" \ + --history-in "$workdir/bench-history.json" \ + --out-current "$workdir/bench-current.json" \ + --history-out "$workdir/bench-history.out.json" + +jq -e '.branch=="feature/test" and (.hyperfine.results|length)==1 and (.criterion.results|length)>=1' "$workdir/bench-current.json" >/dev/null +jq -e '(.runs|length)==2 and (.runs[-1].branch=="feature/test")' "$workdir/bench-history.out.json" >/dev/null + +# Validate criterion time triple parsed. +low_ms=$(jq -r '.criterion.results[] | select(.name=="sample_people/another_bench") | .time_text[0]' "$workdir/bench-current.json") +if [[ "$low_ms" != "10.771 ms" ]]; then + echo "Expected criterion low time 10.771 ms, got: $low_ms" >&2 + exit 1 +fi + +echo "OK: bench_results.js test passed" From 61cd894d6b9ccab8784519bf67ad3774e045e7d8 Mon Sep 17 00:00:00 2001 From: Gil Sharon Date: Wed, 11 Feb 2026 11:16:59 -0500 Subject: [PATCH 2/7] update the index.html file --- scripts/index.html | 187 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 158 insertions(+), 29 deletions(-) diff --git a/scripts/index.html b/scripts/index.html index 20fd94ca..e22da55b 100644 --- a/scripts/index.html +++ b/scripts/index.html @@ -9,10 +9,18 @@ color-scheme: light dark; } + *, + *::before, + *::after { + box-sizing: border-box; + } + body { margin: 0; font: 14px/1.45 system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, Helvetica, Arial, "Apple Color Emoji", "Segoe UI Emoji"; + background: Canvas; + color: CanvasText; } header { @@ -35,6 +43,7 @@ main { padding: 16px 20px 24px; max-width: 1200px; + margin: 0 auto; } .row { @@ -53,18 +62,48 @@ } select, + input[type="url"], + input[type="text"], + input[type="file"], button { font: inherit; padding: 8px 10px; border-radius: 8px; border: 1px solid rgba(127, 127, 127, 0.35); - background: transparent; + background: Field; + color: FieldText; min-width: 260px; } + input[type="file"] { + padding: 6px 10px; + } + + input[type="checkbox"] { + width: 16px; + height: 16px; + margin: 0; + accent-color: Highlight; + } + button { min-width: auto; cursor: pointer; + background: ButtonFace; + color: ButtonText; + } + + select:focus-visible, + input[type="url"]:focus-visible, + input[type="text"]:focus-visible, + input[type="file"]:focus-visible, + button:focus-visible { + outline: 2px solid Highlight; + outline-offset: 2px; + } + + button:hover { + border-color: rgba(127, 127, 127, 0.55); } .checkbox { @@ -75,6 +114,9 @@ border: 1px solid rgba(127, 127, 127, 0.35); border-radius: 8px; user-select: none; + opacity: 0.9; + background: Field; + color: FieldText; } .checkbox input { @@ -133,6 +175,18 @@

IXA benchmark history

+ + + + + +