diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..8ac6b8c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/dependency-update.yml b/.github/workflows/dependency-update.yml new file mode 100644 index 0000000..d8aaa4a --- /dev/null +++ b/.github/workflows/dependency-update.yml @@ -0,0 +1,188 @@ +name: Dependency Update Check + +on: + schedule: + - cron: '0 9 * * 1' # Every Monday 09:00 UTC + workflow_dispatch: + +concurrency: + group: dependency-update + cancel-in-progress: false + +permissions: + contents: write + pull-requests: write + +jobs: + check-and-pr: + name: Check dependencies & open PR + runs-on: ubuntu-latest + outputs: + has_updates: ${{ steps.check.outputs.has_updates }} + pr_branch: ${{ steps.pr.outputs.branch }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install tools + run: | + sudo apt-get update + sudo apt-get install -y jq curl bats bc shellcheck dmidecode fio \ + iproute2 lshw util-linux ethtool hwloc pciutils numactl smartmontools + pip install pre-commit + + - name: Check for updates + id: check + run: | + report=$(bash scripts/check-updates.sh --json 2>/dev/null) + echo "$report" > update-report.json + count=$(echo "$report" | jq '.summary.updates_available') + echo "update_count=$count" >> "$GITHUB_OUTPUT" + if [ "$count" -gt 0 ]; then + echo "has_updates=true" >> "$GITHUB_OUTPUT" + echo "### Dependencies: $count update(s) available" >> "$GITHUB_STEP_SUMMARY" + jq -r '.dependencies[] | select(.update_available==true) | "- **\(.name)**: \(.current_version) → \(.latest_version)"' update-report.json >> "$GITHUB_STEP_SUMMARY" + else + echo "has_updates=false" >> "$GITHUB_OUTPUT" + echo "All dependencies up to date." >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Apply updates + if: steps.check.outputs.has_updates == 'true' + run: bash scripts/check-updates.sh --apply + + - name: Validate — lint + if: steps.check.outputs.has_updates == 'true' + run: make lint + + - name: Validate — unit tests + if: steps.check.outputs.has_updates == 'true' + run: make test + + - name: Validate — static checks + if: steps.check.outputs.has_updates == 'true' + run: make static-checks + + - name: Validate — smoke run + if: steps.check.outputs.has_updates == 'true' + run: sudo bash scripts/run-all.sh --smoke --ci + + - name: Create or update PR + id: pr + if: steps.check.outputs.has_updates == 'true' + env: + GH_TOKEN: ${{ github.token }} + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + BRANCH="deps/auto-update-$(date +%Y%m%d)" + + # Check for existing open dependency update PR + existing_branch=$(gh pr list --label dependencies --state open \ + --json headRefName -q '.[0].headRefName // empty' 2>/dev/null || true) + + if [ -n "$existing_branch" ]; then + BRANCH="$existing_branch" + git checkout -B "$BRANCH" + else + git checkout -b "$BRANCH" + fi + echo "branch=$BRANCH" >> "$GITHUB_OUTPUT" + + git add -A + + # Build commit message + updates_list=$(jq -r '.dependencies[] | select(.update_available==true) | "- \(.name): \(.current_version) → \(.latest_version)"' update-report.json) + git commit -m "deps: update dependencies ($(date +%Y-%m-%d))" -m "$updates_list" || true + + git push -u origin "$BRANCH" --force-with-lease + + # Build PR body + pr_body=$(cat <<'BODY_END' + ## Automated Dependency Update + + | Dependency | Current | Latest | Category | + |------------|---------|--------|----------| + BODY_END + ) + table_rows=$(jq -r '.dependencies[] | select(.update_available==true) | "| \(.name) | \(.current_version) | \(.latest_version) | \(.category) |"' update-report.json) + pr_body="${pr_body} + ${table_rows} + + ### CI Validation + - [x] Lint (\`make lint\`) + - [x] Unit tests (\`make test\`) + - [x] Static checks (\`make static-checks\`) + - [x] Smoke run (\`run-all.sh --smoke --ci\`) + - [ ] **GPU validation** — merge only after a quick run on a GPU host + + > Auto-generated by \`scripts/check-updates.sh\` via scheduled CI." + + if [ -n "$existing_branch" ]; then + pr_number=$(gh pr list --head "$existing_branch" --state open --json number -q '.[0].number' 2>/dev/null || true) + if [ -n "$pr_number" ]; then + gh pr edit "$pr_number" --body "$pr_body" + echo "Updated existing PR #${pr_number}" + fi + else + gh pr create \ + --title "deps: update dependencies ($(date +%Y-%m-%d))" \ + --body "$pr_body" \ + --label "dependencies" || echo "PR creation failed (label may not exist)" + fi + + - name: Upload update report + if: always() + uses: actions/upload-artifact@v4 + with: + name: dependency-update-report + path: update-report.json + if-no-files-found: ignore + + gpu-validate: + name: GPU validation (self-hosted) + needs: check-and-pr + if: needs.check-and-pr.outputs.has_updates == 'true' && vars.HPC_ENABLE_GPU_CI == '1' + runs-on: [self-hosted, linux, x64, gpu, nvidia] + steps: + - name: Checkout update branch + uses: actions/checkout@v4 + with: + ref: ${{ needs.check-and-pr.outputs.pr_branch }} + + - name: Run quick on GPU host + run: sudo bash scripts/run-all.sh --quick --ci + + - name: Post GPU results to PR + if: always() + env: + GH_TOKEN: ${{ github.token }} + run: | + pr_number=$(gh pr list --head "${{ needs.check-and-pr.outputs.pr_branch }}" \ + --state open --json number -q '.[0].number' 2>/dev/null || true) + result_file="/var/log/hpc-bench/results/run-all.json" + if [ -n "$pr_number" ] && [ -f "$result_file" ]; then + acceptance=$(jq -r '.acceptance // "unknown"' "$result_file") + gh pr comment "$pr_number" --body "### GPU Validation: ${acceptance} + Quick-mode results from self-hosted GPU runner. +
Full results JSON + + \`\`\`json + $(jq '.' "$result_file") + \`\`\` +
" + fi + + - name: Upload GPU results + if: always() + uses: actions/upload-artifact@v4 + with: + name: gpu-validation-results + path: /var/log/hpc-bench/results + if-no-files-found: warn diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 865b5ad..63e805c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ default_language_version: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -23,14 +23,14 @@ repos: - id: check-shebang-scripts-are-executable - repo: https://github.com/scop/pre-commit-shfmt - rev: v3.8.0-1 + rev: v3.12.0-2 hooks: - id: shfmt args: ['-i', '4', '-ci', '-sr'] exclude: '^src/' - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.6 + rev: v0.11.0.1 hooks: - id: shellcheck args: ['-s', 'bash', '-S', 'error'] diff --git a/AGENTS.md b/AGENTS.md index 81c2a8b..48b18af 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,14 +9,27 @@ This is a pure-Bash HPC benchmarking CLI suite (no web services, databases, or b | Task | Command | |------|---------| | Lint (pre-commit: shfmt + shellcheck) | `make lint` | -| Unit tests (BATS, 98 tests) | `make test` | +| Unit tests (BATS, 126 tests) | `make test` | | All quality gates | `make check` | | CI static checks | `make static-checks` | | Smoke run (bootstrap + inventory + report, ~5s) | `sudo bash scripts/run-all.sh --smoke --ci` | | Quick run (short benchmarks) | `sudo bash scripts/run-all.sh --quick --ci` | +| Check dependency updates | `make check-updates` | +| Preview dependency updates | `bash scripts/check-updates.sh --apply --dry-run` | + See `Makefile` for all targets and `README.md` / `SKILL.md` for full documentation. +### Dependency update system + +`scripts/check-updates.sh` tracks 14 external dependencies (container images, NVIDIA packages, upstream repos, pre-commit hooks) via `specs/dependencies.json`. It queries nvcr.io, Docker Hub, GitHub, and NVIDIA apt repos. Key modes: +- `--json` for CI consumption +- `--apply` to update version pins in source files +- `--apply --dry-run` to preview without modifying +- `--category ` to filter (container_image, nvidia_package, pre_commit_hook, upstream_source) + +The weekly GitHub Actions workflow (`.github/workflows/dependency-update.yml`) runs this automatically, validates with lint/tests/smoke, and opens a PR. CUDA↔driver compatibility constraints are checked before applying. + ### Gotchas - `pre-commit` is installed as a user package (`pip install --user`). Ensure `$HOME/.local/bin` is on `PATH` (the update script handles this). diff --git a/CHANGELOG.md b/CHANGELOG.md index 71808b9..74a2a1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,23 @@ All notable changes to the HPC Bench Suite are documented here. Version number i ## Version History +### Dependency Update System (2026-02-26) + +1. **New: `scripts/check-updates.sh`** — Automated dependency update checker. + - Tracks 14 external dependencies across 4 categories: container images (hpc-benchmarks, intel-hpckit), NVIDIA packages (driver, CUDA toolkit, DCGM, NCCL, Fabric Manager, Container Toolkit), upstream benchmark sources (gpu-burn, nccl-tests, nvbandwidth), and pre-commit hooks. + - Queries nvcr.io registry, Docker Hub, GitHub API, and NVIDIA apt repo. + - Modes: `--json` (machine-readable), `--apply` (update source files), `--apply --dry-run` (preview), `--category` (filter). + - CUDA↔driver compatibility constraints validated before applying updates. + - Post-apply validation (`bash -n` on `.sh`, `jq` on `.json`) with auto-revert on failure. + - Update history logged to `specs/update-history.json`. +2. **New: `specs/dependencies.json`** — Version manifest (single source of truth for tracked dependencies). +3. **New: `specs/update-history.json`** — Audit log for dependency updates. +4. **New: `.github/workflows/dependency-update.yml`** — Weekly GitHub Actions workflow (Mondays 09:00 UTC) that checks for updates, applies them, validates with lint/tests/smoke, and opens a PR. Optional GPU validation on self-hosted runner. +5. **New: `.github/dependabot.yml`** — Monthly auto-updates for GitHub Actions versions. +6. **New: `tests/check_updates.bats`** — 28 BATS tests for manifest schema, cross-checks, constraints, and script behavior. +7. **Makefile** — Added `check-updates` target. +8. **`.pre-commit-config.yaml`** — Updated hooks: pre-commit-hooks v4.5.0→v6.0.0, shfmt v3.8.0-1→v3.12.0-2, shellcheck-py v0.9.0.6→v0.11.0.1. + ### V1.10 Changes (2026-02-14) 1. **scripts/run-all.sh** — Added CI mode: diff --git a/Makefile b/Makefile index 883624d..7220aa9 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help lint shellcheck test static-checks check smoke quick ci-smoke ci-quick report-html version +.PHONY: help lint shellcheck test static-checks check smoke quick ci-smoke ci-quick report-html check-updates version # ── Meta ── @@ -19,6 +19,9 @@ help: @echo " make ci-quick Run quick with --ci (compact output for CI)" @echo " make report-html Generate optional HTML report from HPC_RESULTS_DIR (default /var/log/hpc-bench/results)" @echo "" + @echo "Maintenance:" + @echo " make check-updates Check tracked dependencies for newer versions" + @echo "" @echo "Info:" @echo " make version Show suite version" @echo "" @@ -66,6 +69,13 @@ report-html: if [ ! -d "$$results" ]; then echo "Results dir not found: $$results. Run the suite first or set HPC_RESULTS_DIR."; exit 1; fi; \ python3 reporting/generate_html_report.py -i "$$results" -o "$$results/report.html" +# ── Maintenance ── + +check-updates: + @command -v jq >/dev/null 2>&1 || { echo "jq not found."; exit 1; } + @command -v curl >/dev/null 2>&1 || { echo "curl not found."; exit 1; } + bash scripts/check-updates.sh + # ── Info ── version: diff --git a/README.md b/README.md index 4a123ca..e890936 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,7 @@ HPC_RESULTS_DIR=/path/to/results bash scripts/report.sh │ ├── bootstrap.sh # Bootstrap and dependency install │ ├── run-all.sh # Master orchestrator (all phases) │ ├── report.sh # Report generator +│ ├── check-updates.sh # Dependency update checker (see below) │ ├── inventory.sh # General / CPU inventory │ ├── gpu-inventory.sh │ ├── topology.sh @@ -211,10 +212,15 @@ HPC_RESULTS_DIR=/path/to/results bash scripts/report.sh │ ├── filesystem-diag.sh │ ├── thermal-power.sh │ └── security-scan.sh -└── src/ # Bundled benchmark sources - ├── stream.c # STREAM memory benchmark - ├── gpu-burn/ # GPU burn-in (CUDA) - └── nccl-tests/ # Minimal NCCL test binaries +├── specs/ +│ ├── modules.json # Module manifest (single source of truth) +│ ├── dependencies.json # Tracked external dependency versions +│ └── update-history.json # Dependency update audit log +├── src/ # Bundled benchmark sources +│ ├── stream.c # STREAM memory benchmark +│ ├── gpu-burn/ # GPU burn-in (CUDA) +│ └── nccl-tests/ # Minimal NCCL test binaries +└── tests/ # BATS unit and integration tests ``` ## Linting and pre-commit @@ -243,6 +249,22 @@ make static-checks - Static gate runs `scripts/ci-static-checks.sh` (`bash -n` + `pre-commit run --all-files`) - Ubuntu VM job runs `run-all.sh --smoke --ci` and `run-all.sh --quick --ci` - Optional GPU job runs on self-hosted runners labeled `self-hosted,linux,x64,gpu,nvidia` and is enabled by repo variable `HPC_ENABLE_GPU_CI=1` +- **Dependency updates:** `.github/workflows/dependency-update.yml` runs weekly (Mondays 09:00 UTC) to check all 14 tracked dependencies for updates, apply them, validate with lint/tests/smoke, and open a PR. Manual trigger: `gh workflow run "Dependency Update Check"`. See **Dependency tracking** below. +- **Dependabot:** `.github/dependabot.yml` auto-updates GitHub Actions versions monthly. + +## Dependency tracking + +The suite tracks 14 external dependencies (container images, NVIDIA packages, upstream benchmark sources, pre-commit hooks) in `specs/dependencies.json`. Check for updates: + +```bash +make check-updates # Human-readable report +bash scripts/check-updates.sh --json # Machine-readable JSON +bash scripts/check-updates.sh --apply --dry-run # Preview what would change +bash scripts/check-updates.sh --apply # Apply updates to source files +bash scripts/check-updates.sh --category nvidia_package # Check one category +``` + +The checker queries nvcr.io, Docker Hub, GitHub, and the NVIDIA apt repo. It validates CUDA↔driver compatibility constraints before applying, runs `bash -n` on modified files after applying, and logs all changes to `specs/update-history.json`. ## Concurrency and locking diff --git a/SKILL.md b/SKILL.md index d2b5475..7a0ae89 100644 --- a/SKILL.md +++ b/SKILL.md @@ -48,16 +48,19 @@ bench/ │ ├── security-scan.sh # Phase 4: SSH audit, services, SUID, kernel │ ├── report.sh # Phase 5: generates report.md from results │ ├── run-all.sh # Orchestrator (runs all phases) -│ └── ci-static-checks.sh # CI-only: shellcheck + syntax checks +│ ├── ci-static-checks.sh # CI-only: shellcheck + syntax checks +│ └── check-updates.sh # Dependency update checker (not a module) ├── specs/ │ ├── modules.json # Module manifest (single source of truth) -│ └── hardware-specs.json # GPU spec lookup data +│ ├── dependencies.json # Tracked external dependency versions +│ └── update-history.json # Dependency update audit log ├── src/ # Bundled sources (gpu-burn, nccl-tests, STREAM) ├── tests/ │ ├── helpers.bash # Shared BATS test helpers │ ├── common_helpers.bats # Unit tests for lib/common.sh │ ├── report_helpers.bats # Unit tests for lib/report-common.sh -│ └── module_integration.bats # Integration tests (syntax, manifest, source-gate) +│ ├── module_integration.bats # Integration tests (syntax, manifest, source-gate) +│ └── check_updates.bats # Tests for dependency checker + manifest ├── .editorconfig # Formatting rules ├── .pre-commit-config.yaml # Pre-commit hooks (shfmt, shellcheck) ├── Makefile # Quality gates: make lint, make test, make smoke @@ -118,6 +121,7 @@ bench/ - `make check` — runs lint + test + static checks. - `make smoke` — runs `run-all.sh --smoke` end-to-end. - `make quick` — runs `run-all.sh --quick` end-to-end. +- `make check-updates` — checks 14 tracked dependencies for upstream updates. ### JSON contract @@ -180,3 +184,6 @@ Things an AI agent should **never** do: | Dev conventions, JSON contract, config guide | [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) | | Contributor guidelines | [CONTRIBUTING.md](CONTRIBUTING.md) | | Quality gates | [Makefile](Makefile) | +| Dependency version manifest | [specs/dependencies.json](specs/dependencies.json) | +| Dependency update checker | [scripts/check-updates.sh](scripts/check-updates.sh) | +| Dependency update history | [specs/update-history.json](specs/update-history.json) | diff --git a/scripts/check-updates.sh b/scripts/check-updates.sh new file mode 100755 index 0000000..bee8f2c --- /dev/null +++ b/scripts/check-updates.sh @@ -0,0 +1,944 @@ +#!/usr/bin/env bash +# check-updates.sh — Check tracked dependencies for available updates +# Usage: bash scripts/check-updates.sh [--json] [--apply] [--dry-run] [--category CAT] [--help] +# +# Reads specs/dependencies.json, queries upstream sources, and reports +# which dependencies have newer versions available. +# +# Not a benchmark module — does not source lib/common.sh or emit module JSON. +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +MANIFEST="${ROOT_DIR}/specs/dependencies.json" +VERSION_FILE="${ROOT_DIR}/VERSION" +HISTORY_FILE="${ROOT_DIR}/specs/update-history.json" + +# ── Defaults ── +OUTPUT_JSON=false +APPLY=false +DRY_RUN=false +FILTER_CATEGORY="" +CURL_TIMEOUT=10 + +# ── Colors (disabled when not a terminal or when NO_COLOR is set) ── +if [ -t 1 ] && [ -z "${NO_COLOR:-}" ]; then + C_GREEN='\033[0;32m' + C_YELLOW='\033[0;33m' + C_RED='\033[0;31m' + C_CYAN='\033[0;36m' + C_RESET='\033[0m' +else + C_GREEN="" C_YELLOW="" C_RED="" C_CYAN="" C_RESET="" +fi + +# ── Logging ── +log() { echo -e "[check-updates] $*"; } +log_ok() { echo -e "[check-updates] ${C_GREEN}✓${C_RESET} $*"; } +log_update() { echo -e "[check-updates] ${C_YELLOW}↑${C_RESET} $*"; } +log_fail() { echo -e "[check-updates] ${C_RED}✗${C_RESET} $*"; } +log_info() { echo -e "[check-updates] ${C_CYAN}ℹ${C_RESET} $*"; } + +# ── Usage ── +usage() { + cat <<'EOF' +Usage: bash scripts/check-updates.sh [OPTIONS] + +Check tracked dependencies for available upstream updates. + +Options: + --json Output machine-readable JSON report to stdout + --apply Apply available updates to source files + --dry-run Preview what --apply would change (use with --apply) + --category CAT Check only one category: + container_image, upstream_source, pre_commit_hook, + nvidia_package + --help Show this help + +Environment: + GITHUB_TOKEN GitHub API token for higher rate limits (5000/hr vs 60/hr) + NGC_API_KEY NGC API key (currently unused; reserved for future auth) + NVIDIA_REPO_OS Ubuntu version for NVIDIA apt repo (default: auto-detect or 2204) + NO_COLOR Disable colored output + +Examples: + bash scripts/check-updates.sh # Human-readable report + bash scripts/check-updates.sh --json # JSON report + bash scripts/check-updates.sh --apply # Check and apply updates + bash scripts/check-updates.sh --category pre_commit_hook # Check one category + bash scripts/check-updates.sh --apply --dry-run # Preview changes +EOF +} + +# ── Argument parsing ── +parse_args() { + while [ $# -gt 0 ]; do + case "$1" in + --json) OUTPUT_JSON=true ;; + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --category) + shift + FILTER_CATEGORY="${1:-}" + if [ -z "$FILTER_CATEGORY" ]; then + echo "Error: --category requires a value" >&2 + exit 1 + fi + ;; + --help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + usage >&2 + exit 1 + ;; + esac + shift + done +} + +# ── Prerequisites ── +require_tools() { + local missing="" + for tool in jq curl; do + if ! command -v "$tool" &> /dev/null; then + missing="${missing} ${tool}" + fi + done + if [ -n "$missing" ]; then + echo "Error: required tools not found:${missing}" >&2 + exit 1 + fi +} + +# ── GitHub API helper ── +github_api() { + local endpoint="$1" + local -a curl_args=( + -s --connect-timeout "$CURL_TIMEOUT" --max-time 30 + -H "Accept: application/vnd.github+json" + ) + if [ -n "${GITHUB_TOKEN:-}" ]; then + curl_args+=(-H "Authorization: Bearer ${GITHUB_TOKEN}") + fi + curl "${curl_args[@]}" "https://api.github.com${endpoint}" 2>/dev/null +} + +# ── Generic curl helper ── +api_get() { + local url="$1" + shift + curl -s --connect-timeout "$CURL_TIMEOUT" --max-time 30 "$@" "$url" 2>/dev/null +} + +# ═══════════════════════════════════════════════════════════════════ +# Check methods — each returns JSON: {"latest":"...", "extra":"..."} +# On failure returns: {"error":"..."} +# ═══════════════════════════════════════════════════════════════════ + +check_nvcr_registry() { + local image="$1" + local tag_filter="${2:-}" + + local token + token=$(api_get "https://nvcr.io/proxy_auth?scope=repository:${image}:pull" \ + | jq -r '.token // empty') + if [ -z "$token" ]; then + echo '{"error":"failed to obtain registry token"}' + return 0 + fi + + local tags_json + tags_json=$(api_get "https://nvcr.io/v2/${image}/tags/list" \ + -H "Authorization: Bearer ${token}") + if [ -z "$tags_json" ] || ! echo "$tags_json" | jq -e '.tags' &> /dev/null; then + echo '{"error":"failed to fetch tags from registry"}' + return 0 + fi + + local latest + if [ -n "$tag_filter" ]; then + latest=$(echo "$tags_json" | jq -r --arg f "$tag_filter" \ + '[.tags[] | select(test($f))] | sort | last // empty') + else + latest=$(echo "$tags_json" | jq -r '.tags | sort | last // empty') + fi + + if [ -z "$latest" ]; then + echo '{"error":"no tags matched filter"}' + return 0 + fi + jq -n --arg l "$latest" '{"latest":$l}' +} + +check_dockerhub() { + local repo="$1" + local tag_filter="${2:-}" + + local resp + resp=$(api_get "https://hub.docker.com/v2/repositories/${repo}/tags/?page_size=100&ordering=-last_updated") + if [ -z "$resp" ] || ! echo "$resp" | jq -e '.results' &> /dev/null; then + echo '{"error":"failed to fetch tags from Docker Hub"}' + return 0 + fi + + local latest + if [ -n "$tag_filter" ]; then + latest=$(echo "$resp" | jq -r --arg f "$tag_filter" \ + '[.results[].name | select(test($f))] | sort | last // empty') + else + latest=$(echo "$resp" | jq -r '.results[0].name // empty') + fi + + if [ -z "$latest" ]; then + echo '{"error":"no tags matched filter"}' + return 0 + fi + jq -n --arg l "$latest" '{"latest":$l}' +} + +check_github_releases() { + local repo="$1" + + local resp + resp=$(github_api "/repos/${repo}/releases/latest") + + local tag + tag=$(echo "$resp" | jq -r '.tag_name // empty') + if [ -z "$tag" ]; then + local msg + msg=$(echo "$resp" | jq -r '.message // "no releases found"') + echo "{\"error\":$(echo "$msg" | jq -Rs .)}" + return 0 + fi + + local date + date=$(echo "$resp" | jq -r '.published_at // empty') + jq -n --arg l "$tag" --arg d "$date" '{"latest":$l,"published":$d}' +} + +check_github_tags() { + local repo="$1" + + local resp + resp=$(github_api "/repos/${repo}/tags?per_page=1") + + local tag + tag=$(echo "$resp" | jq -r '.[0].name // empty' 2>/dev/null) + if [ -z "$tag" ]; then + echo '{"error":"no tags found"}' + return 0 + fi + jq -n --arg l "$tag" '{"latest":$l}' +} + +check_github_commits() { + local repo="$1" + + local resp + resp=$(github_api "/repos/${repo}/commits?per_page=1") + + local sha date + sha=$(echo "$resp" | jq -r '.[0].sha // empty' 2>/dev/null) + if [ -z "$sha" ]; then + echo '{"error":"failed to fetch commits"}' + return 0 + fi + sha="${sha:0:8}" + date=$(echo "$resp" | jq -r '.[0].commit.committer.date // empty' 2>/dev/null) + jq -n --arg s "$sha" --arg d "$date" '{"latest":$s,"commit_date":$d}' +} + +# ═══════════════════════════════════════════════════════════════════ +# NVIDIA apt repo — download once, cache for the run +# ═══════════════════════════════════════════════════════════════════ + +_NVIDIA_PACKAGES_CACHE="" + +_nvidia_repo_os() { + if [ -n "${NVIDIA_REPO_OS:-}" ]; then + echo "$NVIDIA_REPO_OS" + return + fi + if [ -f /etc/os-release ]; then + # shellcheck disable=SC1091 + local ver_id + ver_id=$(. /etc/os-release && echo "${VERSION_ID:-}") + if [ -n "$ver_id" ]; then + echo "$ver_id" | tr -d '.' + return + fi + fi + echo "2204" +} + +_ensure_nvidia_packages_cache() { + if [ -n "$_NVIDIA_PACKAGES_CACHE" ] && [ -f "$_NVIDIA_PACKAGES_CACHE" ]; then + return 0 + fi + local os_ver + os_ver=$(_nvidia_repo_os) + local url="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${os_ver}/x86_64/Packages" + _NVIDIA_PACKAGES_CACHE=$(mktemp /tmp/hpc-bench-nvidia-packages.XXXXXX) + if ! api_get "$url" -o "$_NVIDIA_PACKAGES_CACHE" --max-time 60; then + rm -f "$_NVIDIA_PACKAGES_CACHE" + _NVIDIA_PACKAGES_CACHE="" + return 1 + fi + if [ ! -s "$_NVIDIA_PACKAGES_CACHE" ]; then + rm -f "$_NVIDIA_PACKAGES_CACHE" + _NVIDIA_PACKAGES_CACHE="" + return 1 + fi + return 0 +} + +_cleanup_nvidia_cache() { + [ -n "$_NVIDIA_PACKAGES_CACHE" ] && rm -f "$_NVIDIA_PACKAGES_CACHE" || true +} +trap '_cleanup_nvidia_cache' EXIT + +check_nvidia_apt_repo() { + local dep_json="$1" + + if ! _ensure_nvidia_packages_cache; then + echo '{"error":"failed to download NVIDIA apt repo index"}' + return 0 + fi + + local pattern version_extract + pattern=$(echo "$dep_json" | jq -r '.source.package_pattern') + version_extract=$(echo "$dep_json" | jq -r '.source.version_extract') + + case "$version_extract" in + highest_match) + local latest + latest=$(grep -oP "$pattern" "$_NVIDIA_PACKAGES_CACHE" \ + | grep -oP '\d+$' | sort -nr | head -1) + if [ -z "$latest" ]; then + echo '{"error":"no matching packages in repo"}' + return 0 + fi + jq -n --arg l "$latest" '{"latest":$l}' + ;; + cuda_major_minor) + local latest_pkg + latest_pkg=$(grep -oP "$pattern" "$_NVIDIA_PACKAGES_CACHE" \ + | sed 's/Package: cuda-toolkit-//' \ + | sort -t- -k1,1nr -k2,2nr | head -1) + if [ -z "$latest_pkg" ]; then + echo '{"error":"no CUDA toolkit packages in repo"}' + return 0 + fi + local latest + latest=$(echo "$latest_pkg" | tr '-' '.') + jq -n --arg l "$latest" '{"latest":$l}' + ;; + package_version) + local latest + latest=$(grep -A1 "$pattern" "$_NVIDIA_PACKAGES_CACHE" \ + | grep '^Version:' | sed 's/^Version: //' \ + | sed 's/^[0-9]*://' | sort -Vr | head -1) + if [ -z "$latest" ]; then + echo '{"error":"package not found in repo"}' + return 0 + fi + jq -n --arg l "$latest" '{"latest":$l}' + ;; + nccl_version) + local raw + raw=$(grep -A1 "$pattern" "$_NVIDIA_PACKAGES_CACHE" \ + | grep '^Version:' | sed 's/^Version: //' | sort -Vr | head -1) + if [ -z "$raw" ]; then + echo '{"error":"libnccl2 not found in repo"}' + return 0 + fi + local latest + latest=$(echo "$raw" | grep -oP '^[0-9]+\.[0-9]+\.[0-9]+') + jq -n --arg l "$latest" --arg r "$raw" '{"latest":$l,"full_version":$r}' + ;; + *) + echo "{\"error\":\"unknown version_extract: ${version_extract}\"}" + ;; + esac +} + +# ═══════════════════════════════════════════════════════════════════ +# Dispatch: run the right check method for a dependency +# ═══════════════════════════════════════════════════════════════════ + +run_check() { + local dep_json="$1" + + local method image repo tag_filter + method=$(echo "$dep_json" | jq -r '.source.check_method') + + case "$method" in + nvcr_registry) + image=$(echo "$dep_json" | jq -r '.source.image') + tag_filter=$(echo "$dep_json" | jq -r '.source.tag_filter // empty') + check_nvcr_registry "$image" "$tag_filter" + ;; + dockerhub) + repo=$(echo "$dep_json" | jq -r '.source.repo') + tag_filter=$(echo "$dep_json" | jq -r '.source.tag_filter // empty') + check_dockerhub "$repo" "$tag_filter" + ;; + github_releases) + repo=$(echo "$dep_json" | jq -r '.source.repo') + check_github_releases "$repo" + ;; + github_tags) + repo=$(echo "$dep_json" | jq -r '.source.repo') + check_github_tags "$repo" + ;; + github_commits) + repo=$(echo "$dep_json" | jq -r '.source.repo') + check_github_commits "$repo" + ;; + nvidia_apt_repo) + check_nvidia_apt_repo "$dep_json" + ;; + *) + echo "{\"error\":\"unknown check method: ${method}\"}" + ;; + esac +} + +# ═══════════════════════════════════════════════════════════════════ +# Version comparison +# ═══════════════════════════════════════════════════════════════════ + +has_update() { + local current="$1" latest="$2" category="$3" + + # upstream_source deps track HEAD — always "informational", never "update" + if [ "$category" = "upstream_source" ]; then + echo "false" + return 0 + fi + # Simple string comparison: different = update available + if [ "$current" != "$latest" ] && [ -n "$latest" ]; then + echo "true" + else + echo "false" + fi +} + +# ═══════════════════════════════════════════════════════════════════ +# Version utilities +# ═══════════════════════════════════════════════════════════════════ + +version_gte() { + local a="$1" b="$2" + [ "$(printf '%s\n%s' "$a" "$b" | sort -V | head -1)" = "$b" ] +} + +version_major() { + echo "${1%%.*}" | tr -cd '0-9' +} + +# ═══════════════════════════════════════════════════════════════════ +# Constraint checking +# ═══════════════════════════════════════════════════════════════════ + +check_constraints() { + local dep_json="$1" latest_version="$2" + + local constraints + constraints=$(echo "$dep_json" | jq -c '.constraints // []') + local n_constraints + n_constraints=$(echo "$constraints" | jq 'length') + + if [ "$n_constraints" -eq 0 ]; then + echo '[]' + return 0 + fi + + local warnings="[]" + local ci + for ((ci = 0; ci < n_constraints; ci++)); do + local constraint + constraint=$(echo "$constraints" | jq -c ".[$ci]") + local req_dep req_desc + req_dep=$(echo "$constraint" | jq -r '.requires') + req_desc=$(echo "$constraint" | jq -r '.description // ""') + + local current_req_ver + current_req_ver=$(jq -r --arg n "$req_dep" \ + '.dependencies[] | select(.name==$n) | .current_version' "$MANIFEST") + + local latest_major + latest_major=$(version_major "$latest_version") + + local min_required + min_required=$(echo "$constraint" | jq -r --arg m "$latest_major" \ + '.minimum_version[$m] // empty') + + if [ -z "$min_required" ]; then + warnings=$(echo "$warnings" | jq \ + --arg dep "$req_dep" --arg desc "$req_desc" \ + --arg major "$latest_major" \ + '. + [{"requires":$dep,"status":"unknown","message":"no constraint data for major version "+$major,"description":$desc}]') + continue + fi + + local status="ok" + if version_gte "$current_req_ver" "$min_required"; then + status="ok" + else + status="incompatible" + fi + + warnings=$(echo "$warnings" | jq \ + --arg dep "$req_dep" --arg min "$min_required" \ + --arg cur "$current_req_ver" --arg st "$status" --arg desc "$req_desc" \ + '. + [{"requires":$dep,"minimum":$min,"current":$cur,"status":$st,"description":$desc}]') + done + + echo "$warnings" +} + +# ═══════════════════════════════════════════════════════════════════ +# Post-apply validation +# ═══════════════════════════════════════════════════════════════════ + +validate_modified_files() { + local -a files=("$@") + local all_ok=true + + for file in "${files[@]}"; do + [ -z "$file" ] && continue + local full_path="${ROOT_DIR}/${file}" + [ ! -f "$full_path" ] && continue + + case "$file" in + *.sh) + if ! bash -n "$full_path" 2>/dev/null; then + log_fail "Post-apply validation failed: ${file} has bash syntax errors" + git -C "$ROOT_DIR" checkout -- "$file" 2>/dev/null || true + all_ok=false + fi + ;; + *.json) + if ! jq . "$full_path" > /dev/null 2>&1; then + log_fail "Post-apply validation failed: ${file} has invalid JSON" + git -C "$ROOT_DIR" checkout -- "$file" 2>/dev/null || true + all_ok=false + fi + ;; + *.yaml | *.yml) + if command -v python3 &> /dev/null; then + if ! python3 -c "import yaml; yaml.safe_load(open('${full_path}'))" 2>/dev/null; then + log_fail "Post-apply validation failed: ${file} has invalid YAML" + git -C "$ROOT_DIR" checkout -- "$file" 2>/dev/null || true + all_ok=false + fi + fi + ;; + esac + done + + if [ "$all_ok" = true ]; then + return 0 + else + return 1 + fi +} + +# ═══════════════════════════════════════════════════════════════════ +# Update history +# ═══════════════════════════════════════════════════════════════════ + +append_update_history() { + local updates_json="$1" + local check_date="$2" + + if [ "$(echo "$updates_json" | jq 'length')" -eq 0 ]; then + return 0 + fi + + local history="[]" + if [ -f "$HISTORY_FILE" ]; then + history=$(jq '.' "$HISTORY_FILE" 2>/dev/null || echo '[]') + fi + + local tmp + tmp=$(mktemp) + jq --arg d "$check_date" --argjson u "$updates_json" \ + '. + [{"date": $d, "updates": $u}]' <<< "$history" > "$tmp" \ + && mv "$tmp" "$HISTORY_FILE" +} + +# ═══════════════════════════════════════════════════════════════════ +# Apply logic +# ═══════════════════════════════════════════════════════════════════ + +apply_container_image() { + local name="$1" current="$2" latest="$3" + + if [ "$name" = "hpc-benchmarks" ]; then + if [ "$DRY_RUN" = true ]; then + log_info " conf/defaults.sh: HPL_IMAGE ${current} → ${latest}, HPL_IMAGE_ALT → ${current}" + return 0 + fi + local file="${ROOT_DIR}/conf/defaults.sh" + local old_alt + old_alt=$(jq -r '.dependencies[] | select(.name=="hpc-benchmarks") | .current_alt_version' "$MANIFEST") + sed -i "s|hpc-benchmarks:${current}}|hpc-benchmarks:${latest}}|" "$file" + sed -i "s|hpc-benchmarks:${old_alt}}|hpc-benchmarks:${current}}|" "$file" + MODIFIED_FILES+=("conf/defaults.sh") + local tmp + tmp=$(mktemp) + jq --arg v "$latest" --arg a "$current" \ + '(.dependencies[] | select(.name=="hpc-benchmarks")).current_version = $v + | (.dependencies[] | select(.name=="hpc-benchmarks")).current_alt_version = $a' \ + "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + log_info "Updated conf/defaults.sh: HPL_IMAGE → ${latest}, HPL_IMAGE_ALT → ${current}" + fi + + if [ "$name" = "intel-hpckit" ]; then + if [ "$DRY_RUN" = true ]; then + log_info " scripts/hpl-cpu.sh: intel/hpckit ${current} → ${latest}" + return 0 + fi + local file="${ROOT_DIR}/scripts/hpl-cpu.sh" + sed -i "s|intel/hpckit:[^\"]*|intel/hpckit:${latest}|" "$file" + MODIFIED_FILES+=("scripts/hpl-cpu.sh") + local tmp + tmp=$(mktemp) + jq --arg v "$latest" \ + '(.dependencies[] | select(.name=="intel-hpckit")).current_version = $v' \ + "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + log_info "Updated scripts/hpl-cpu.sh: intel/hpckit → ${latest}" + fi +} + +apply_precommit_hooks() { + if [ "$DRY_RUN" = true ]; then + log_info " .pre-commit-config.yaml: would run pre-commit autoupdate" + return 0 + fi + if ! command -v pre-commit &> /dev/null; then + log_fail "pre-commit not installed — cannot apply pre-commit hook updates" + return 1 + fi + + log_info "Running pre-commit autoupdate..." + (cd "$ROOT_DIR" && pre-commit autoupdate) + MODIFIED_FILES+=(".pre-commit-config.yaml") + + local config="${ROOT_DIR}/.pre-commit-config.yaml" + local tmp + tmp=$(mktemp) + local hooks_rev shfmt_rev shellcheck_rev + hooks_rev=$(grep -A1 'pre-commit/pre-commit-hooks' "$config" | grep 'rev:' | awk '{print $2}') + shfmt_rev=$(grep -A1 'scop/pre-commit-shfmt' "$config" | grep 'rev:' | awk '{print $2}') + shellcheck_rev=$(grep -A1 'shellcheck-py/shellcheck-py' "$config" | grep 'rev:' | awk '{print $2}') + + jq --arg h "${hooks_rev:-}" --arg s "${shfmt_rev:-}" --arg sc "${shellcheck_rev:-}" ' + (if $h != "" then (.dependencies[] | select(.name=="pre-commit-hooks")).current_version = $h else . end) + | (if $s != "" then (.dependencies[] | select(.name=="pre-commit-shfmt")).current_version = $s else . end) + | (if $sc != "" then (.dependencies[] | select(.name=="shellcheck-py")).current_version = $sc else . end) + ' "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + + log_info "Updated .pre-commit-config.yaml and synced manifest" +} + +apply_nvidia_package() { + local name="$1" current="$2" latest="$3" + local file="${ROOT_DIR}/scripts/bootstrap.sh" + + if [ "$name" = "nvidia-driver" ]; then + if [ "$DRY_RUN" = true ]; then + log_info " scripts/bootstrap.sh: driver fallback ${current} → ${latest}" + return 0 + fi + sed -i "s|nvidia-driver-${current}-server|nvidia-driver-${latest}-server|" "$file" + MODIFIED_FILES+=("scripts/bootstrap.sh") + local tmp + tmp=$(mktemp) + jq --arg v "$latest" \ + '(.dependencies[] | select(.name=="nvidia-driver")).current_version = $v' \ + "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + log_info "Updated bootstrap.sh: driver fallback → nvidia-driver-${latest}-server" + elif [ "$name" = "cuda-toolkit" ]; then + if [ "$DRY_RUN" = true ]; then + log_info " scripts/bootstrap.sh: CUDA fallback ${current} → ${latest}" + return 0 + fi + sed -i "s|_cuda_runtime:-${current}|_cuda_runtime:-${latest}|g" "$file" + MODIFIED_FILES+=("scripts/bootstrap.sh") + local tmp + tmp=$(mktemp) + jq --arg v "$latest" \ + '(.dependencies[] | select(.name=="cuda-toolkit")).current_version = $v' \ + "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + log_info "Updated bootstrap.sh: CUDA fallback → ${latest}" + else + if [ "$DRY_RUN" = true ]; then + log_info " manifest: ${name} → ${latest} (report-only)" + return 0 + fi + local tmp + tmp=$(mktemp) + jq --arg n "$name" --arg v "$latest" \ + '(.dependencies[] | select(.name==$n)).current_version = $v' \ + "$MANIFEST" > "$tmp" && mv "$tmp" "$MANIFEST" + MODIFIED_FILES+=("specs/dependencies.json") + log_info "Updated manifest: ${name} → ${latest} (report-only, no file changes)" + fi +} + +# ═══════════════════════════════════════════════════════════════════ +# Main +# ═══════════════════════════════════════════════════════════════════ + +main() { + parse_args "$@" + require_tools + + if [ ! -f "$MANIFEST" ]; then + echo "Error: manifest not found: ${MANIFEST}" >&2 + exit 1 + fi + + if [ "$DRY_RUN" = true ] && [ "$APPLY" = false ]; then + echo "Error: --dry-run requires --apply" >&2 + exit 1 + fi + + local suite_version="unknown" + [ -f "$VERSION_FILE" ] && suite_version=$(cat "$VERSION_FILE") + local check_date + check_date=$(date -u +%Y-%m-%dT%H:%M:%SZ) + local dep_count + dep_count=$(jq '.dependencies | length' "$MANIFEST") + + if [ "$OUTPUT_JSON" = false ]; then + log "HPC Bench Suite v${suite_version} — Dependency Update Check" + log "Date: ${check_date}" + log "Manifest: ${MANIFEST} (${dep_count} dependencies)" + log "─────────────────────────────────────────────────────" + fi + + # Collect results as a JSON array + local results="[]" + local count_update=0 count_ok=0 count_failed=0 count_constraint_warnings=0 + local apply_containers="" apply_precommit=false apply_nvidia="" + declare -a MODIFIED_FILES=() + + local i + for ((i = 0; i < dep_count; i++)); do + local dep + dep=$(jq -c ".dependencies[$i]" "$MANIFEST") + + local name category current + name=$(echo "$dep" | jq -r '.name') + category=$(echo "$dep" | jq -r '.category') + current=$(echo "$dep" | jq -r '.current_version') + + # Category filter + if [ -n "$FILTER_CATEGORY" ] && [ "$category" != "$FILTER_CATEGORY" ]; then + continue + fi + + # Run the check + local check_result + check_result=$(run_check "$dep") + + local error latest + error=$(echo "$check_result" | jq -r '.error // empty') + + if [ -n "$error" ]; then + count_failed=$((count_failed + 1)) + if [ "$OUTPUT_JSON" = false ]; then + log_fail "$(printf '%-24s check failed: %s' "$name" "$error")" + fi + results=$(echo "$results" | jq --arg n "$name" --arg c "$category" \ + --arg cv "$current" --arg e "$error" \ + '. + [{"name":$n,"category":$c,"current_version":$cv,"status":"check_failed","error":$e}]') + continue + fi + + latest=$(echo "$check_result" | jq -r '.latest // empty') + local update_available + update_available=$(has_update "$current" "$latest" "$category") + + local targets + targets=$(echo "$dep" | jq -c '.update_targets // []') + + # Check constraints for this dependency + local cw_json="[]" + if [ "$update_available" = "true" ]; then + cw_json=$(check_constraints "$dep" "$latest") + fi + + if [ "$update_available" = "true" ]; then + count_update=$((count_update + 1)) + if [ "$OUTPUT_JSON" = false ]; then + log_update "$(printf '%-24s %s → %s' "$name" "$current" "$latest")" + local n_cw + n_cw=$(echo "$cw_json" | jq 'length') + local cwi + for ((cwi = 0; cwi < n_cw; cwi++)); do + local cw_req cw_min cw_cur cw_status + cw_req=$(echo "$cw_json" | jq -r ".[$cwi].requires") + cw_min=$(echo "$cw_json" | jq -r ".[$cwi].minimum") + cw_cur=$(echo "$cw_json" | jq -r ".[$cwi].current") + cw_status=$(echo "$cw_json" | jq -r ".[$cwi].status") + if [ "$cw_status" = "incompatible" ]; then + log_fail " ⚠ requires ${cw_req} ≥ ${cw_min} (current: ${cw_cur} — INCOMPATIBLE)" + count_constraint_warnings=$((count_constraint_warnings + 1)) + elif [ "$cw_status" = "ok" ]; then + log_ok " ⚠ requires ${cw_req} ≥ ${cw_min} (current: ${cw_cur} — OK)" + fi + done + fi + # Queue for --apply + if [ "$category" = "container_image" ]; then + apply_containers="${apply_containers} ${name}:${current}:${latest}" + elif [ "$category" = "pre_commit_hook" ]; then + apply_precommit=true + elif [ "$category" = "nvidia_package" ]; then + apply_nvidia="${apply_nvidia} ${name}:${current}:${latest}" + fi + else + count_ok=$((count_ok + 1)) + if [ "$OUTPUT_JSON" = false ]; then + local extra="" + if [ "$category" = "upstream_source" ]; then + local commit_date published + commit_date=$(echo "$check_result" | jq -r '.commit_date // empty') + published=$(echo "$check_result" | jq -r '.published // empty') + if [ -n "$commit_date" ]; then + extra=" (latest commit: ${latest}, ${commit_date%%T*})" + elif [ -n "$published" ]; then + extra=" (latest release: ${latest}, ${published%%T*})" + elif [ -n "$latest" ]; then + extra=" (latest tag: ${latest})" + fi + fi + log_ok "$(printf '%-24s %s%s' "$name" "$current" "$extra")" + fi + fi + + results=$(echo "$results" | jq \ + --arg n "$name" --arg c "$category" --arg cv "$current" \ + --arg lv "$latest" --argjson ua "$update_available" --argjson t "$targets" \ + --argjson cr "$check_result" --argjson cw "$cw_json" \ + '. + [{"name":$n,"category":$c,"current_version":$cv,"latest_version":$lv, + "update_available":$ua,"update_targets":$t,"check_detail":$cr, + "constraint_warnings":$cw}]') + done + + local total=$((count_update + count_ok + count_failed)) + + if [ "$OUTPUT_JSON" = false ]; then + log "─────────────────────────────────────────────────────" + log "Summary: ${count_update} updates available, ${count_ok} up-to-date, ${count_failed} check failed" + fi + + # JSON output + if [ "$OUTPUT_JSON" = true ]; then + jq -n \ + --arg date "$check_date" --arg ver "$suite_version" \ + --argjson total "$total" --argjson updates "$count_update" \ + --argjson ok "$count_ok" --argjson failed "$count_failed" \ + --argjson cw_total "$count_constraint_warnings" \ + --argjson deps "$results" \ + '{ + check_date: $date, + suite_version: $ver, + summary: {total: $total, updates_available: $updates, up_to_date: $ok, check_failed: $failed, constraint_warnings: $cw_total}, + dependencies: $deps + }' + fi + + # Apply updates + if [ "$APPLY" = true ] && [ "$count_update" -gt 0 ]; then + if [ "$OUTPUT_JSON" = false ]; then + echo "" + if [ "$DRY_RUN" = true ]; then + log "Dry run — showing what --apply would change:" + else + log "Applying updates..." + fi + fi + + local applied_updates="[]" + + for entry in $apply_containers; do + local cname ccurrent clatest + cname="${entry%%:*}" + entry="${entry#*:}" + ccurrent="${entry%%:*}" + clatest="${entry#*:}" + apply_container_image "$cname" "$ccurrent" "$clatest" + applied_updates=$(echo "$applied_updates" | jq \ + --arg n "$cname" --arg f "$ccurrent" --arg t "$clatest" \ + '. + [{"name":$n,"from":$f,"to":$t}]') + done + + for entry in $apply_nvidia; do + local nname ncurrent nlatest + nname="${entry%%:*}" + entry="${entry#*:}" + ncurrent="${entry%%:*}" + nlatest="${entry#*:}" + apply_nvidia_package "$nname" "$ncurrent" "$nlatest" + applied_updates=$(echo "$applied_updates" | jq \ + --arg n "$nname" --arg f "$ncurrent" --arg t "$nlatest" \ + '. + [{"name":$n,"from":$f,"to":$t}]') + done + + if [ "$apply_precommit" = true ]; then + apply_precommit_hooks + applied_updates=$(echo "$applied_updates" | jq \ + '. + [{"name":"pre-commit-hooks","from":"(multiple)","to":"latest"}]') + fi + + if [ "$DRY_RUN" = false ]; then + # Post-apply validation + local unique_files + unique_files=$(printf '%s\n' "${MODIFIED_FILES[@]}" | sort -u) + local -a files_array + mapfile -t files_array <<< "$unique_files" + if validate_modified_files "${files_array[@]}"; then + if [ "$OUTPUT_JSON" = false ]; then + log_ok "Post-apply validation passed" + fi + else + if [ "$OUTPUT_JSON" = false ]; then + log_fail "Post-apply validation failed — reverted broken files" + fi + return 1 + fi + + # Update history + append_update_history "$applied_updates" "$check_date" + + if [ "$OUTPUT_JSON" = false ]; then + log "Done. Review changes with: git diff" + fi + else + if [ "$OUTPUT_JSON" = false ]; then + log_info "Dry run complete — no files were modified." + fi + fi + elif [ "$APPLY" = true ] && [ "$count_update" -eq 0 ]; then + if [ "$OUTPUT_JSON" = false ]; then + log_info "Nothing to apply — all dependencies are up to date." + fi + fi + + return 0 +} + +main "$@" diff --git a/specs/dependencies.json b/specs/dependencies.json new file mode 100644 index 0000000..4700b0f --- /dev/null +++ b/specs/dependencies.json @@ -0,0 +1,191 @@ +{ + "schema_version": 1, + "description": "Tracked external dependencies for automated update checking. See scripts/check-updates.sh.", + "dependencies": [ + { + "name": "hpc-benchmarks", + "category": "container_image", + "current_version": "24.03", + "current_alt_version": "23.10", + "source": { + "check_method": "nvcr_registry", + "registry": "nvcr.io", + "image": "nvidia/hpc-benchmarks", + "tag_filter": "^[0-9]{2}\\.[0-9]{2}$" + }, + "update_targets": [ + "conf/defaults.sh" + ], + "notes": "NVIDIA HPC Benchmarks container for HPL-MxP. On update, current primary becomes alt." + }, + { + "name": "intel-hpckit", + "category": "container_image", + "current_version": "latest", + "source": { + "check_method": "dockerhub", + "repo": "intel/hpckit", + "tag_filter": "^[0-9]{4}\\.[0-9]+\\.[0-9]+-devel-ubuntu22\\.04$" + }, + "update_targets": [ + "scripts/hpl-cpu.sh" + ], + "notes": "Intel HPC toolkit for CPU HPL. Currently uses :latest; checker reports pinnable alternatives." + }, + { + "name": "gpu-burn", + "category": "upstream_source", + "current_version": "HEAD", + "source": { + "check_method": "github_commits", + "repo": "wilicc/gpu-burn" + }, + "update_targets": [], + "notes": "No releases or tags. Cloned at HEAD. Report-only — no auto-apply." + }, + { + "name": "nccl-tests", + "category": "upstream_source", + "current_version": "HEAD", + "source": { + "check_method": "github_tags", + "repo": "NVIDIA/nccl-tests" + }, + "update_targets": [], + "notes": "Has tags (e.g. v2.17.9) but suite clones HEAD. Report-only." + }, + { + "name": "nvbandwidth", + "category": "upstream_source", + "current_version": "HEAD", + "source": { + "check_method": "github_releases", + "repo": "NVIDIA/nvbandwidth" + }, + "update_targets": [], + "notes": "Has GitHub releases (e.g. v0.8). Suite clones HEAD. Report-only." + }, + { + "name": "pre-commit-hooks", + "category": "pre_commit_hook", + "current_version": "v6.0.0", + "source": { + "check_method": "github_releases", + "repo": "pre-commit/pre-commit-hooks" + }, + "update_targets": [ + ".pre-commit-config.yaml" + ] + }, + { + "name": "pre-commit-shfmt", + "category": "pre_commit_hook", + "current_version": "v3.12.0-2", + "source": { + "check_method": "github_tags", + "repo": "scop/pre-commit-shfmt" + }, + "update_targets": [ + ".pre-commit-config.yaml" + ] + }, + { + "name": "shellcheck-py", + "category": "pre_commit_hook", + "current_version": "v0.11.0.1", + "source": { + "check_method": "github_tags", + "repo": "shellcheck-py/shellcheck-py" + }, + "update_targets": [ + ".pre-commit-config.yaml" + ] + }, + { + "name": "nvidia-driver", + "category": "nvidia_package", + "current_version": "580", + "source": { + "check_method": "nvidia_apt_repo", + "package_pattern": "^Package: nvidia-driver-([0-9]+)$", + "version_extract": "highest_match" + }, + "update_targets": [ + "scripts/bootstrap.sh" + ], + "notes": "Fallback driver series in bootstrap.sh. Updated with -server suffix preserved." + }, + { + "name": "cuda-toolkit", + "category": "nvidia_package", + "current_version": "12.0", + "source": { + "check_method": "nvidia_apt_repo", + "package_pattern": "^Package: cuda-toolkit-([0-9]+)-([0-9]+)$", + "version_extract": "cuda_major_minor" + }, + "update_targets": [ + "scripts/bootstrap.sh" + ], + "notes": "Fallback CUDA runtime version in bootstrap.sh (_cuda_runtime:-M.N).", + "constraints": [ + { + "requires": "nvidia-driver", + "minimum_version": { + "13": "580", + "12": "525", + "11": "450" + }, + "description": "CUDA major version requires minimum driver series" + } + ] + }, + { + "name": "datacenter-gpu-manager", + "category": "nvidia_package", + "current_version": "3.3.9", + "source": { + "check_method": "nvidia_apt_repo", + "package_pattern": "^Package: datacenter-gpu-manager$", + "version_extract": "package_version" + }, + "update_targets": [], + "notes": "Bootstrap installs latest. Report-only; current_version tracks latest known." + }, + { + "name": "libnccl2", + "category": "nvidia_package", + "current_version": "2.29.3", + "source": { + "check_method": "nvidia_apt_repo", + "package_pattern": "^Package: libnccl2$", + "version_extract": "nccl_version" + }, + "update_targets": [], + "notes": "Bootstrap installs latest. Version stripped of CUDA suffix (e.g. 2.29.3-1+cuda13.1 → 2.29.3)." + }, + { + "name": "nvidia-fabricmanager", + "category": "nvidia_package", + "current_version": "575", + "source": { + "check_method": "nvidia_apt_repo", + "package_pattern": "^Package: nvidia-fabricmanager-([0-9]+)$", + "version_extract": "highest_match" + }, + "update_targets": [], + "notes": "Bootstrap dynamically matches driver major. Report-only." + }, + { + "name": "nvidia-container-toolkit", + "category": "nvidia_package", + "current_version": "v1.18.2", + "source": { + "check_method": "github_releases", + "repo": "NVIDIA/nvidia-container-toolkit" + }, + "update_targets": [], + "notes": "Bootstrap installs from stable repo. Report-only." + } + ] +} diff --git a/specs/update-history.json b/specs/update-history.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/specs/update-history.json @@ -0,0 +1 @@ +[] diff --git a/tests/check_updates.bats b/tests/check_updates.bats new file mode 100755 index 0000000..20a67ba --- /dev/null +++ b/tests/check_updates.bats @@ -0,0 +1,331 @@ +#!/usr/bin/env bats +# Tests for specs/dependencies.json manifest and scripts/check-updates.sh. +# Run: bats tests/check_updates.bats + +load helpers + +# ── Test setup ── +setup() { + setup_test_env "test-check-updates" + SCRIPT="${HPC_BENCH_ROOT}/scripts/check-updates.sh" + MANIFEST="${HPC_BENCH_ROOT}/specs/dependencies.json" + DEFAULTS="${HPC_BENCH_ROOT}/conf/defaults.sh" + PRECOMMIT_CFG="${HPC_BENCH_ROOT}/.pre-commit-config.yaml" +} + +teardown() { + teardown_test_env +} + +# ═══════════════════════════════════════════ +# Manifest schema validation +# ═══════════════════════════════════════════ + +@test "deps-manifest: dependencies.json is valid JSON" { + jq . "$MANIFEST" >/dev/null 2>&1 +} + +@test "deps-manifest: has schema_version field" { + local ver + ver=$(jq -r '.schema_version' "$MANIFEST") + [ "$ver" != "null" ] && [ -n "$ver" ] +} + +@test "deps-manifest: every dependency has required fields" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local name category version method + name=$(jq -r ".dependencies[$i].name" "$MANIFEST") + category=$(jq -r ".dependencies[$i].category" "$MANIFEST") + version=$(jq -r ".dependencies[$i].current_version" "$MANIFEST") + method=$(jq -r ".dependencies[$i].source.check_method" "$MANIFEST") + if [ "$name" = "null" ] || [ "$category" = "null" ] || \ + [ "$version" = "null" ] || [ "$method" = "null" ]; then + bad="${bad} index=${i}(${name:-?})" + fi + done + [ -z "$bad" ] || _fail "Dependencies missing required fields:${bad}" +} + +@test "deps-manifest: no duplicate dependency names" { + local dupes + dupes=$(jq -r '.dependencies[].name' "$MANIFEST" | sort | uniq -d) + [ -z "$dupes" ] || _fail "Duplicate dependency names: $dupes" +} + +@test "deps-manifest: all check_method values are recognized" { + local known="nvcr_registry dockerhub github_releases github_tags github_commits nvidia_apt_repo" + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local method + method=$(jq -r ".dependencies[$i].source.check_method" "$MANIFEST") + local found=false + for k in $known; do + [ "$method" = "$k" ] && found=true && break + done + if [ "$found" = false ]; then + bad="${bad} ${method}" + fi + done + [ -z "$bad" ] || _fail "Unknown check methods:${bad}" +} + +@test "deps-manifest: container_image deps have update_targets" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local cat targets + cat=$(jq -r ".dependencies[$i].category" "$MANIFEST") + if [ "$cat" = "container_image" ]; then + targets=$(jq ".dependencies[$i].update_targets | length" "$MANIFEST") + if [ "$targets" -eq 0 ]; then + local name + name=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${name}" + fi + fi + done + [ -z "$bad" ] || _fail "Container image deps without update_targets:${bad}" +} + +@test "deps-manifest: upstream_source deps have empty update_targets" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local cat targets + cat=$(jq -r ".dependencies[$i].category" "$MANIFEST") + if [ "$cat" = "upstream_source" ]; then + targets=$(jq ".dependencies[$i].update_targets | length" "$MANIFEST") + if [ "$targets" -ne 0 ]; then + local name + name=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${name}" + fi + fi + done + [ -z "$bad" ] || _fail "Upstream source deps should have empty update_targets:${bad}" +} + +# ═══════════════════════════════════════════ +# Cross-check: manifest vs actual files +# ═══════════════════════════════════════════ + +@test "deps-crosscheck: HPL_IMAGE version in defaults.sh matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="hpc-benchmarks") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -q "hpc-benchmarks:${manifest_ver}" "$DEFAULTS" +} + +@test "deps-crosscheck: HPL_IMAGE_ALT version in defaults.sh matches manifest" { + local manifest_alt + manifest_alt=$(jq -r '.dependencies[] | select(.name=="hpc-benchmarks") | .current_alt_version' "$MANIFEST") + [ -n "$manifest_alt" ] + grep -q "hpc-benchmarks:${manifest_alt}" "$DEFAULTS" +} + +@test "deps-crosscheck: pre-commit-hooks rev matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="pre-commit-hooks") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -A1 'pre-commit/pre-commit-hooks' "$PRECOMMIT_CFG" | grep -q "rev: ${manifest_ver}" +} + +@test "deps-crosscheck: pre-commit-shfmt rev matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="pre-commit-shfmt") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -A1 'scop/pre-commit-shfmt' "$PRECOMMIT_CFG" | grep -q "rev: ${manifest_ver}" +} + +@test "deps-crosscheck: shellcheck-py rev matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="shellcheck-py") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -A1 'shellcheck-py/shellcheck-py' "$PRECOMMIT_CFG" | grep -q "rev: ${manifest_ver}" +} + +# ═══════════════════════════════════════════ +# Script validation +# ═══════════════════════════════════════════ + +@test "check-updates: script passes bash -n" { + bash -n "$SCRIPT" +} + +@test "check-updates: --help exits 0 and prints usage" { + run bash "$SCRIPT" --help + [ "$status" -eq 0 ] + [[ "$output" == *"Usage:"* ]] + [[ "$output" == *"--json"* ]] + [[ "$output" == *"--apply"* ]] +} + +@test "check-updates: unknown flag exits non-zero" { + run bash "$SCRIPT" --bogus-flag + [ "$status" -ne 0 ] +} + +@test "check-updates: --category with missing value exits non-zero" { + run bash "$SCRIPT" --category + [ "$status" -ne 0 ] +} + +# ═══════════════════════════════════════════ +# NVIDIA package manifest validation +# ═══════════════════════════════════════════ + +@test "deps-manifest: nvidia_package deps with update_targets have target files" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local cat targets_len + cat=$(jq -r ".dependencies[$i].category" "$MANIFEST") + if [ "$cat" = "nvidia_package" ]; then + targets_len=$(jq ".dependencies[$i].update_targets | length" "$MANIFEST") + for ((j = 0; j < targets_len; j++)); do + local tgt + tgt=$(jq -r ".dependencies[$i].update_targets[$j]" "$MANIFEST") + if [ ! -f "${HPC_BENCH_ROOT}/${tgt}" ]; then + local name + name=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${name}:${tgt}" + fi + done + fi + done + [ -z "$bad" ] || _fail "Update target files not found:${bad}" +} + +@test "deps-manifest: nvidia_apt_repo deps have required source fields" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local method + method=$(jq -r ".dependencies[$i].source.check_method" "$MANIFEST") + if [ "$method" = "nvidia_apt_repo" ]; then + local pattern extract + pattern=$(jq -r ".dependencies[$i].source.package_pattern" "$MANIFEST") + extract=$(jq -r ".dependencies[$i].source.version_extract" "$MANIFEST") + if [ "$pattern" = "null" ] || [ "$extract" = "null" ]; then + local name + name=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${name}" + fi + fi + done + [ -z "$bad" ] || _fail "nvidia_apt_repo deps missing package_pattern/version_extract:${bad}" +} + +@test "deps-crosscheck: nvidia-driver fallback in bootstrap.sh matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="nvidia-driver") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -q "nvidia-driver-${manifest_ver}-server" "${HPC_BENCH_ROOT}/scripts/bootstrap.sh" +} + +@test "deps-crosscheck: CUDA fallback in bootstrap.sh matches manifest" { + local manifest_ver + manifest_ver=$(jq -r '.dependencies[] | select(.name=="cuda-toolkit") | .current_version' "$MANIFEST") + [ -n "$manifest_ver" ] + grep -q "_cuda_runtime:-${manifest_ver}" "${HPC_BENCH_ROOT}/scripts/bootstrap.sh" +} + +@test "deps-manifest: total dependency count is 14" { + local count + count=$(jq '.dependencies | length' "$MANIFEST") + [ "$count" -eq 14 ] || _fail "Expected 14 dependencies, got ${count}" +} + +# ═══════════════════════════════════════════ +# Phase 4: Constraints, dry-run, history +# ═══════════════════════════════════════════ + +@test "deps-manifest: constraints reference valid dependency names" { + local all_names + all_names=$(jq -r '[.dependencies[].name] | join(" ")' "$MANIFEST") + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local n_constraints + n_constraints=$(jq ".dependencies[$i].constraints // [] | length" "$MANIFEST") + for ((ci = 0; ci < n_constraints; ci++)); do + local req + req=$(jq -r ".dependencies[$i].constraints[$ci].requires" "$MANIFEST") + local found=false + for n in $all_names; do + [ "$n" = "$req" ] && found=true && break + done + if [ "$found" = false ]; then + local dname + dname=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${dname}→${req}" + fi + done + done + [ -z "$bad" ] || _fail "Constraints reference unknown deps:${bad}" +} + +@test "deps-manifest: constraints have required fields" { + local bad="" + local count + count=$(jq '.dependencies | length' "$MANIFEST") + for ((i = 0; i < count; i++)); do + local n_constraints + n_constraints=$(jq ".dependencies[$i].constraints // [] | length" "$MANIFEST") + for ((ci = 0; ci < n_constraints; ci++)); do + local req min_ver desc + req=$(jq -r ".dependencies[$i].constraints[$ci].requires" "$MANIFEST") + min_ver=$(jq -r ".dependencies[$i].constraints[$ci].minimum_version" "$MANIFEST") + desc=$(jq -r ".dependencies[$i].constraints[$ci].description" "$MANIFEST") + if [ "$req" = "null" ] || [ "$min_ver" = "null" ] || [ "$desc" = "null" ]; then + local dname + dname=$(jq -r ".dependencies[$i].name" "$MANIFEST") + bad="${bad} ${dname}[$ci]" + fi + done + done + [ -z "$bad" ] || _fail "Constraints missing required fields:${bad}" +} + +@test "check-updates: --dry-run without --apply exits non-zero" { + run bash "$SCRIPT" --dry-run + [ "$status" -ne 0 ] + [[ "$output" == *"--dry-run requires --apply"* ]] +} + +@test "check-updates: --help shows dry-run option" { + run bash "$SCRIPT" --help + [ "$status" -eq 0 ] + [[ "$output" == *"--dry-run"* ]] +} + +@test "update-history: initial file is valid JSON array" { + local history_file="${HPC_BENCH_ROOT}/specs/update-history.json" + [ -f "$history_file" ] + local val + val=$(jq 'type' "$history_file") + [ "$val" = '"array"' ] +} + +@test "deps-manifest: cuda-toolkit has constraints for nvidia-driver" { + local has_constraint + has_constraint=$(jq '.dependencies[] | select(.name=="cuda-toolkit") | .constraints[]? | select(.requires=="nvidia-driver") | .requires' "$MANIFEST") + [ -n "$has_constraint" ] || _fail "cuda-toolkit should have a constraint on nvidia-driver" +} + +@test "deps-manifest: cuda-toolkit constraint has CUDA 13 driver minimum" { + local min_13 + min_13=$(jq -r '.dependencies[] | select(.name=="cuda-toolkit") | .constraints[] | select(.requires=="nvidia-driver") | .minimum_version["13"] // empty' "$MANIFEST") + [ -n "$min_13" ] || _fail "cuda-toolkit should have minimum driver for CUDA 13" + [ "$min_13" -ge 500 ] || _fail "CUDA 13 minimum driver ${min_13} seems too low" +}