diff --git a/.github/workflows/bench-compare.yaml b/.github/workflows/bench-compare.yaml index a7141e07..16086532 100644 --- a/.github/workflows/bench-compare.yaml +++ b/.github/workflows/bench-compare.yaml @@ -3,11 +3,15 @@ name: Benchmark Pull Requests on: pull_request: +env: + BENCH_HISTORY_BRANCH: benchmarks-history + BENCH_HISTORY_DIR: tmp-bench-history + jobs: compare-branches: runs-on: ubuntu-latest permissions: - pull-requests: write + contents: read steps: - uses: actions/checkout@v6 - name: Setup environment @@ -28,29 +32,113 @@ jobs: run: git checkout ${{ github.event.pull_request.head.ref }} - name: Run hyperfine benchmarks - run: mise run bench:hyperfine -- -- --export-markdown hyperfine.md + run: | + set -euo pipefail + mise run bench:hyperfine -- -- --export-markdown hyperfine.md --export-json hyperfine.json - - name: Run benchmarks on head branch and format output + - name: Run criterion benchmarks (baseline compare) run: | + set -euo pipefail + if cargo bench -p ixa-bench -- --baseline base 2>&1 | tee criterion-compare.txt; then + cargo run -q -p ixa-bench --bin check_criterion_regressions | tee criterion-regressions.txt + else + echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" | tee criterion-regressions.txt + mise run bench:criterion 2>&1 | tee -a criterion-compare.txt + fi + + - name: Format PR comment + run: | + set -euo pipefail echo '### Benchmark Results' > results.md echo '' >> results.md echo '#### Hyperfine' >> results.md echo '' >> results.md - echo '```' >> results.md cat hyperfine.md >> results.md - echo '```' >> results.md echo '' >> results.md echo '#### Criterion' >> results.md echo '' >> results.md - if ! cargo bench -p ixa-bench -- --baseline base 2>&1; then - echo "Note: A comparison could not be generated. Maybe you added new benchmarks?" >> results.md - mise run bench:criterion 2>&1 + echo '```' >> results.md + cat criterion-regressions.txt >> results.md + echo '```' >> results.md + + - name: Upload PR comment artifact + uses: actions/upload-artifact@v6 + with: + name: pr-comment + path: results.md + + - name: Skip bench history JSON (PR not targeting main) + if: ${{ github.event.pull_request.base.ref != 'main' }} + run: | + set -euo pipefail + echo "PR targets '${{ github.event.pull_request.base.ref }}', not 'main'; skipping bench history JSON steps." + { + echo "### Bench history" + echo "Skipping bench history JSON generation/publish because this PR targets '${{ github.event.pull_request.base.ref }}' (not 'main')." + } >> "$GITHUB_STEP_SUMMARY" + + - name: + Download previous bench history from benchmarks-history branch (best + effort) + if: ${{ github.event.pull_request.base.ref == 'main' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + # Fetches raw JSON directly from the benchmarks-history branch. + # If it doesn't exist yet, we continue with an empty history. + if gh api \ + -H "Accept: application/vnd.github+json" \ + "/repos/${GITHUB_REPOSITORY}/contents/bench-history.json?ref=${BENCH_HISTORY_BRANCH}" \ + --jq '.content' > bench-history.b64 2>/dev/null; then + base64 -d bench-history.b64 > bench-history.json + echo "Downloaded bench-history.json from ${BENCH_HISTORY_BRANCH}" else - echo '```' >> results.md - cargo run -q -p ixa-bench --bin check_criterion_regressions >> results.md - echo '```' >> results.md + echo "No bench-history.json found on ${BENCH_HISTORY_BRANCH} (first run?)." fi + - name: Create JSON results + if: ${{ github.event.pull_request.base.ref == 'main' }} + env: + PR_NUMBER: ${{ github.event.pull_request.number || '' }} + BASE_REF: ${{ github.event.pull_request.base.ref || '' }} + BASE_SHA: ${{ github.event.pull_request.base.sha || '' }} + HEAD_REF: ${{ github.event.pull_request.head.ref || github.ref_name }} + HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + RUN_BRANCH: + ${{ github.event.pull_request.head.ref || github.ref_name }} + run: | + set -euo pipefail + node scripts/bench_results.js \ + --repo "${GITHUB_REPOSITORY}" \ + --branch "${RUN_BRANCH}" \ + --pr-number "${PR_NUMBER}" \ + --base-ref "${BASE_REF}" --base-sha "${BASE_SHA}" \ + --head-ref "${HEAD_REF}" --head-sha "${HEAD_SHA}" \ + --hyperfine-json hyperfine.json \ + --criterion-log criterion-compare.txt \ + --history-in bench-history.json \ + --out-current bench-current.json \ + --history-out bench-history.json + + - name: Upload bench history artifact + if: ${{ github.event.pull_request.base.ref == 'main' }} + uses: actions/upload-artifact@v6 + with: + name: bench-history + path: bench-history.json + + comment-on-pr: + runs-on: ubuntu-latest + needs: compare-branches + permissions: + pull-requests: write + steps: + - name: Download PR comment artifact + uses: actions/download-artifact@v6 + with: + name: pr-comment + - name: Add comment to PR run: | gh pr comment ${{ github.event.pull_request.number }} \ @@ -58,3 +146,44 @@ jobs: --body-file results.md env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + publish-bench-history: + runs-on: ubuntu-latest + needs: compare-branches + if: ${{ github.event.pull_request.base.ref == 'main' }} + permissions: + contents: write + steps: + - name: Download bench history artifact + uses: actions/download-artifact@v6 + with: + name: bench-history + + - name: Checkout benchmarks-history branch + uses: actions/checkout@v6 + with: + ref: ${{ env.BENCH_HISTORY_BRANCH }} + path: ${{ env.BENCH_HISTORY_DIR }} + fetch-depth: 1 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Publish bench-history.json to benchmarks-history branch + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + cp bench-history.json "${BENCH_HISTORY_DIR}/bench-history.json" + pushd "${BENCH_HISTORY_DIR}" >/dev/null + git checkout "${BENCH_HISTORY_BRANCH}" + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + # git diff doesn't consider untracked files; use status to detect any change. + if [ -z "$(git status --porcelain)" ]; then + echo "No changes to bench-history.json" + exit 0 + fi + git add bench-history.json + git commit -m "Update bench-history.json (PR #${PR_NUMBER} @ ${HEAD_SHA:0:7})" + git push + popd >/dev/null diff --git a/scripts/bench_results.js b/scripts/bench_results.js new file mode 100644 index 00000000..5c15f161 --- /dev/null +++ b/scripts/bench_results.js @@ -0,0 +1,237 @@ +#!/usr/bin/env node +/* +Generates benchmark JSON artifacts from Hyperfine + Criterion outputs. + +Usage (example): + node scripts/bench_results.js \ + --repo owner/repo \ + --branch my-branch \ + --pr-number 123 \ + --base-ref main --base-sha abc \ + --head-ref feature --head-sha def \ + --hyperfine-json hyperfine.json \ + --criterion-log criterion-compare.txt \ + --history-in bench-history.json \ + --out-current bench-current.json \ + --history-out bench-history.json + +All args are optional except input files; missing inputs produce empty result arrays. +*/ + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +function die(message) { + process.stderr.write(`${message}\n`); + process.exit(2); +} + +function getArgValue(argv, name) { + const idx = argv.indexOf(name); + if (idx === -1) return undefined; + const value = argv[idx + 1]; + if (!value || value.startsWith('--')) die(`Missing value for ${name}`); + return value; +} + +function hasArg(argv, name) { + return argv.includes(name); +} + +function parseMaybeNumber(text) { + if (text == null || text === '') return undefined; + const n = Number(text); + return Number.isFinite(n) ? n : undefined; +} + +function parseDurationToSeconds(text) { + // Examples: "12.3 ms ± 0.2 ms", "1.234 s", "456 µs" + const m = String(text).trim().match(/([0-9]*\.?[0-9]+)\s*([a-zA-Zµμ]+)/); + if (!m) return null; + const value = Number(m[1]); + const unitRaw = m[2]; + const unit = unitRaw.replace('μ', 'µ'); + const factor = { + s: 1, + sec: 1, + ms: 1e-3, + us: 1e-6, + 'µs': 1e-6, + ns: 1e-9, + }[unit] ?? null; + if (!Number.isFinite(value) || factor == null) return null; + return value * factor; +} + +function readTextIfExists(filePath) { + if (!filePath) return ''; + if (!fs.existsSync(filePath)) return ''; + return fs.readFileSync(filePath, 'utf8'); +} + +function readJsonIfExists(filePath) { + if (!filePath) return null; + if (!fs.existsSync(filePath)) return null; + return JSON.parse(fs.readFileSync(filePath, 'utf8')); +} + +function parseCriterionCompareLog(text) { + // Extracts benchmark name and the reported time confidence interval triple. + // Example: + // Benchmarking sample_people/sampling_multiple_l_reservoir + // ... + // time: [10.771 ms 10.811 ms 10.866 ms] + const lines = String(text || '').split(/\r?\n/); + const resultsByName = new Map(); + let currentName = null; + + for (const line of lines) { + const benchMatch = line.match(/^Benchmarking\s+(.+?)\s*$/); + if (benchMatch) { + const raw = benchMatch[1].trim(); + // Criterion emits progress/status suffixes like: + // "Benchmarking foo: Warming up ...", "Benchmarking foo: Collecting ...", "Benchmarking foo: Analyzing" + // We want the stable benchmark identifier ("foo"). + const statusStripped = raw.replace(/\s*:\s*(Warming up|Collecting|Analyzing)\b.*$/u, '').trim(); + currentName = statusStripped || raw; + continue; + } + + const timeMatch = line.match( + /\btime:\s*\[\s*([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s+([0-9]*\.?[0-9]+\s*[a-zA-Zµμ]+)\s*\]/, + ); + if (!timeMatch || !currentName) continue; + + const t1 = timeMatch[1]; + const t2 = timeMatch[2]; + const t3 = timeMatch[3]; + const s1 = parseDurationToSeconds(t1); + const s2 = parseDurationToSeconds(t2); + const s3 = parseDurationToSeconds(t3); + if (s1 == null || s2 == null || s3 == null) continue; + + resultsByName.set(currentName, { + name: currentName, + time_text: [t1, t2, t3], + time_sec: [s1, s2, s3], + }); + } + + return Array.from(resultsByName.values()); +} + +function parseHyperfineJson(hyperfineJson) { + if (!hyperfineJson || !Array.isArray(hyperfineJson.results)) return []; + + return hyperfineJson.results.map((r) => { + const times = Array.isArray(r.times) ? r.times.filter((n) => Number.isFinite(n)) : []; + return { + name: r.command ?? r.parameter ?? 'unknown', + times_sec: times, + mean_sec: Number.isFinite(r.mean) ? r.mean : undefined, + min_sec: Number.isFinite(r.min) ? r.min : undefined, + max_sec: Number.isFinite(r.max) ? r.max : undefined, + stddev_sec: Number.isFinite(r.stddev) ? r.stddev : undefined, + }; + }); +} + +function normalizeRunPrNumber(run) { + if (!run || typeof run !== 'object') return undefined; + const raw = run.pr_number; + if (raw == null) return undefined; + const n = Number(raw); + return Number.isFinite(n) ? n : undefined; +} + +function main() { + const argv = process.argv.slice(2); + + if (hasArg(argv, '--help') || hasArg(argv, '-h')) { + process.stdout.write(fs.readFileSync(__filename, 'utf8').split('\n').slice(0, 40).join('\n') + '\n'); + return; + } + + const repo = getArgValue(argv, '--repo') ?? process.env.GITHUB_REPOSITORY; + const branch = getArgValue(argv, '--branch') ?? process.env.RUN_BRANCH ?? process.env.GITHUB_REF_NAME; + const prNumber = parseMaybeNumber(getArgValue(argv, '--pr-number') ?? process.env.PR_NUMBER); + + const baseRef = getArgValue(argv, '--base-ref') ?? process.env.BASE_REF; + const baseSha = getArgValue(argv, '--base-sha') ?? process.env.BASE_SHA; + + const headRef = getArgValue(argv, '--head-ref') ?? process.env.HEAD_REF; + const headSha = getArgValue(argv, '--head-sha') ?? process.env.HEAD_SHA ?? process.env.GITHUB_SHA; + + const runAt = getArgValue(argv, '--run-at') ?? new Date().toISOString(); + + const hyperfineJsonPath = getArgValue(argv, '--hyperfine-json') ?? 'hyperfine.json'; + const criterionLogPath = getArgValue(argv, '--criterion-log') ?? 'criterion-compare.txt'; + + const outCurrent = getArgValue(argv, '--out-current') ?? 'bench-current.json'; + const historyIn = getArgValue(argv, '--history-in'); + const historyOut = getArgValue(argv, '--history-out') ?? 'bench-history.json'; + + const hyperfineJson = readJsonIfExists(hyperfineJsonPath); + const hyperfineTimings = parseHyperfineJson(hyperfineJson); + + const criterionCompareLog = readTextIfExists(criterionLogPath); + const criterionTimings = parseCriterionCompareLog(criterionCompareLog); + + const payload = { + schema: 1, + generated_at: runAt, + repository: repo, + pr_number: prNumber, + branch, + base: { + ref: baseRef, + sha: baseSha, + }, + head: { + ref: headRef, + sha: headSha, + url: repo && headSha ? `https://github.com/${repo}/commit/${headSha}` : undefined, + }, + hyperfine: { + results: hyperfineTimings, + }, + criterion: { + results: criterionTimings, + }, + }; + + fs.writeFileSync(outCurrent, JSON.stringify(payload, null, 2)); + + // History handling. + const historyPath = path.resolve(historyOut); + const history = historyIn + ? (readJsonIfExists(historyIn) ?? { schema: 1, runs: [] }) + : (readJsonIfExists(historyPath) ?? { schema: 1, runs: [] }); + + if (!Array.isArray(history.runs)) history.runs = []; + + history.schema = 1; + history.updated_at = runAt; + const newRun = { + run_at: runAt, + branch, + pr_number: prNumber, + base: payload.base, + head: payload.head, + hyperfine: payload.hyperfine.results, + criterion: payload.criterion.results, + }; + + // For PRs, keep a single entry per PR number (reruns update in-place rather than append). + if (Number.isFinite(prNumber)) { + history.runs = history.runs.filter((r) => normalizeRunPrNumber(r) !== prNumber); + } + + history.runs.push(newRun); + + fs.writeFileSync(historyPath, JSON.stringify(history, null, 2)); +} + +main(); diff --git a/scripts/index.html b/scripts/index.html new file mode 100644 index 00000000..693ec906 --- /dev/null +++ b/scripts/index.html @@ -0,0 +1,993 @@ + + + + + + IXA Bench History + + + + +
+

IXA benchmark history

+
Loading…
+
+ +
+
+ + + + + + + + + + + + + + + + + +
+ +
+
+

Time series

+
+ +
+
+ +
+

Latest vs previous (percent change)

+
+ + + +
+
+
+
+
+ + +
+ + + + + diff --git a/scripts/test_bench_results.sh b/scripts/test_bench_results.sh new file mode 100755 index 00000000..50bd8aa2 --- /dev/null +++ b/scripts/test_bench_results.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Local test harness for scripts/bench_results.js +# Usage: +# bash scripts/test_bench_results.sh + +repo="acme/example" + +workdir="$(mktemp -d)" +trap 'rm -rf "$workdir"' EXIT + +cat >"$workdir/hyperfine.json" <<'JSON' +{ + "results": [ + { + "command": "echo hello", + "mean": 0.012, + "min": 0.011, + "max": 0.013, + "stddev": 0.001, + "times": [0.011, 0.012, 0.013] + } + ] +} +JSON + +cat >"$workdir/criterion-compare.txt" <<'TXT' +Benchmarking sample_people/sampling_multiple_l_reservoir +Benchmarking sample_people/another_bench + time: [10.771 ms 10.811 ms 10.866 ms] +TXT + +cat >"$workdir/bench-history.json" <<'JSON' +{ + "schema": 1, + "updated_at": "2020-01-01T00:00:00.000Z", + "runs": [] +} +JSON + +history1="$workdir/bench-history.1.json" +history2="$workdir/bench-history.2.json" +history3="$workdir/bench-history.3.json" + +node "$(pwd)/scripts/bench_results.js" \ + --repo "$repo" \ + --branch "feature/test" \ + --pr-number 42 \ + --base-ref main --base-sha 1111111 \ + --head-ref feature/test --head-sha 2222222 \ + --run-at "2026-02-09T00:00:00.000Z" \ + --hyperfine-json "$workdir/hyperfine.json" \ + --criterion-log "$workdir/criterion-compare.txt" \ + --history-in "$workdir/bench-history.json" \ + --out-current "$workdir/bench-current.json" \ + --history-out "$history1" + +jq -e '.branch=="feature/test" and (.hyperfine.results|length)==1 and (.criterion.results|length)>=1' "$workdir/bench-current.json" >/dev/null +jq -e '(.runs|length)==1 and (.runs[0].pr_number==42) and (.runs[0].head.sha=="2222222")' "$history1" >/dev/null + +# Add another PR; should append (history grows). +node "$(pwd)/scripts/bench_results.js" \ + --repo "$repo" \ + --branch "feature/other" \ + --pr-number 43 \ + --base-ref main --base-sha 1111111 \ + --head-ref feature/other --head-sha 3333333 \ + --run-at "2026-02-09T01:00:00.000Z" \ + --hyperfine-json "$workdir/hyperfine.json" \ + --criterion-log "$workdir/criterion-compare.txt" \ + --history-in "$history1" \ + --out-current "$workdir/bench-current.json" \ + --history-out "$history2" + +jq -e '(.runs|length)==2 and ([.runs[].pr_number]|sort)==[42,43]' "$history2" >/dev/null + +# Re-run PR 42; should update existing entry (history length unchanged). +node "$(pwd)/scripts/bench_results.js" \ + --repo "$repo" \ + --branch "feature/test-rerun" \ + --pr-number 42 \ + --base-ref main --base-sha 1111111 \ + --head-ref feature/test-rerun --head-sha 4444444 \ + --run-at "2026-02-09T02:00:00.000Z" \ + --hyperfine-json "$workdir/hyperfine.json" \ + --criterion-log "$workdir/criterion-compare.txt" \ + --history-in "$history2" \ + --out-current "$workdir/bench-current.json" \ + --history-out "$history3" + +jq -e '(.runs|length)==2 and ([.runs[].pr_number]|sort)==[42,43]' "$history3" >/dev/null +jq -e '([.runs[] | select(.pr_number==42)] | length)==1' "$history3" >/dev/null +jq -e '(.runs[] | select(.pr_number==42) | .head.sha)=="4444444" and (.runs[] | select(.pr_number==42) | .run_at)=="2026-02-09T02:00:00.000Z" and (.runs[] | select(.pr_number==42) | .branch)=="feature/test-rerun"' "$history3" >/dev/null +jq -e '(.runs[] | select(.pr_number==43) | .head.sha)=="3333333"' "$history3" >/dev/null + +# Validate criterion time triple parsed. +low_ms=$(jq -r '.criterion.results[] | select(.name=="sample_people/another_bench") | .time_text[0]' "$workdir/bench-current.json") +if [[ "$low_ms" != "10.771 ms" ]]; then + echo "Expected criterion low time 10.771 ms, got: $low_ms" >&2 + exit 1 +fi + +echo "OK: bench_results.js test passed"