Skip to content
Merged
92 changes: 92 additions & 0 deletions .ci/run_test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ case $TestSuite in
"evmfallbacksuite")
CMAKE_OPTIONS="$CMAKE_OPTIONS -DZEN_ENABLE_SPEC_TEST=ON -DZEN_ENABLE_ASSEMBLYSCRIPT_TEST=ON -DZEN_ENABLE_EVM=ON -DZEN_ENABLE_LIBEVM=ON -DZEN_ENABLE_JIT_FALLBACK_TEST=ON"
;;
"benchmarksuite")
CMAKE_OPTIONS="$CMAKE_OPTIONS -DZEN_ENABLE_EVM=ON -DZEN_ENABLE_LIBEVM=ON -DZEN_ENABLE_SINGLEPASS_JIT=OFF -DZEN_ENABLE_MULTIPASS_JIT=ON -DZEN_ENABLE_JIT_PRECOMPILE_FALLBACK=ON"
;;
esac

case $CPU_EXCEPTION_TYPE in
Expand All @@ -97,6 +100,10 @@ if [[ $TestSuite == "evmonetestsuite" ]]; then
STACK_TYPES=("-DZEN_ENABLE_VIRTUAL_STACK=ON")
fi

if [[ $TestSuite == "benchmarksuite" ]]; then
STACK_TYPES=("-DZEN_ENABLE_VIRTUAL_STACK=ON")
fi

export PATH=$PATH:$PWD/build
CMAKE_OPTIONS_ORIGIN="$CMAKE_OPTIONS"

Expand Down Expand Up @@ -163,5 +170,90 @@ for STACK_TYPE in ${STACK_TYPES[@]}; do
python3 tools/run_evm_tests.py -r build/dtvm $EXTRA_EXE_OPTIONS
./build/evmFallbackExecutionTests
;;
"benchmarksuite")
# Clone evmone and run performance regression check
EVMONE_DIR="evmone"
if [ ! -d "$EVMONE_DIR" ]; then
git clone --depth 1 --recurse-submodules -b for_test https://github.com/DTVMStack/evmone.git $EVMONE_DIR
fi

BENCHMARK_THRESHOLD=${BENCHMARK_THRESHOLD:-0.15}
BENCHMARK_MODE=${BENCHMARK_MODE:-multipass}
BENCHMARK_SUMMARY_FILE=${BENCHMARK_SUMMARY_FILE:-/tmp/perf_summary.md}

cp build/lib/* $EVMONE_DIR/

cd $EVMONE_DIR
Comment on lines +184 to +186
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set -e is enabled, so cp build/lib/* $EVMONE_DIR/ will fail the whole benchmark run if the glob doesn’t match (or if build/lib contains non-regular files). Since only libdtvmapi.so is needed for the benchmark runner, consider copying that specific file (and/or enabling nullglob / adding an explicit existence check) to make the CI step more robust.

Suggested change
cp build/lib/* $EVMONE_DIR/
cd $EVMONE_DIR
if [ -f "build/lib/libdtvmapi.so" ]; then
cp build/lib/libdtvmapi.so "$EVMONE_DIR/"
else
echo "Error: build/lib/libdtvmapi.so not found; cannot run benchmarks." >&2
exit 1
fi
cd "$EVMONE_DIR"

Copilot uses AI. Check for mistakes.

cp ../tools/check_performance_regression.py ./

if [ ! -f "build/bin/evmone-bench" ]; then
cmake -S . -B build -DEVMONE_TESTING=ON -DCMAKE_BUILD_TYPE=Release
cmake --build build --parallel -j 16
fi

BASELINE_CACHE=${BENCHMARK_BASELINE_CACHE:-}

if [ -n "$BASELINE_CACHE" ] && [ -f "$BASELINE_CACHE" ]; then
# Cached baseline available -- only run current benchmarks.
echo "Using cached baseline: $BASELINE_CACHE"
python3 check_performance_regression.py \
--baseline "$BASELINE_CACHE" \
--threshold "$BENCHMARK_THRESHOLD" \
--output-summary "$BENCHMARK_SUMMARY_FILE" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks
elif [ -n "$BENCHMARK_BASELINE_LIB" ]; then
# No cache -- run baseline benchmarks with the pre-built
# baseline library, then run current benchmarks and compare.
echo "Running baseline benchmarks with library from base branch..."
cp "$BENCHMARK_BASELINE_LIB"/libdtvmapi.so ./libdtvmapi.so
SAVE_PATH=${BASELINE_CACHE:-/tmp/perf_baseline.json}
python3 check_performance_regression.py \
--save-baseline "$SAVE_PATH" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks

echo "Running current benchmarks with PR library..."
cp ../build/lib/libdtvmapi.so ./libdtvmapi.so
python3 check_performance_regression.py \
--baseline "$SAVE_PATH" \
--threshold "$BENCHMARK_THRESHOLD" \
--output-summary "$BENCHMARK_SUMMARY_FILE" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks
elif [ -n "$BENCHMARK_SAVE_BASELINE" ]; then
echo "Saving performance baseline..."
python3 check_performance_regression.py \
--save-baseline "$BENCHMARK_SAVE_BASELINE" \
--output-summary "$BENCHMARK_SUMMARY_FILE" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks
elif [ -n "$BENCHMARK_BASELINE_FILE" ]; then
echo "Checking performance regression against baseline..."
python3 check_performance_regression.py \
--baseline "$BENCHMARK_BASELINE_FILE" \
--threshold "$BENCHMARK_THRESHOLD" \
--output-summary "$BENCHMARK_SUMMARY_FILE" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks
else
echo "Running benchmark suite without comparison..."
python3 check_performance_regression.py \
--save-baseline benchmark_results.json \
--output-summary "$BENCHMARK_SUMMARY_FILE" \
--lib ./libdtvmapi.so \
--mode "$BENCHMARK_MODE" \
--benchmark-dir test/evm-benchmarks/benchmarks
cat benchmark_results.json
fi

cd ..
;;
esac
done
166 changes: 166 additions & 0 deletions .github/workflows/dtvm_evm_test_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,169 @@ jobs:
export ENABLE_GAS_METER=true

bash .ci/run_test_suite.sh

performance_regression_check:
name: Performance Regression Check (${{ matrix.mode }})
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
issues: write
strategy:
fail-fast: false
matrix:
mode: [interpreter, multipass]
container:
image: dtvmdev1/dtvm-dev-x64:main
steps:
- name: Check out code
uses: actions/checkout@v3
with:
submodules: "true"
fetch-depth: 0

- name: Setup git safe directory
run: |
echo "Configuring git safe directory: ${{ github.workspace }}"
git config --global --add safe.directory /__w/DTVM/DTVM

- name: Code Format Check
run: |
./tools/format.sh check

- name: Restore baseline cache
id: baseline-cache
uses: actions/cache@v4
with:
path: /tmp/perf_baseline_${{ matrix.mode }}.json
key: perf-baseline-${{ matrix.mode }}-${{ github.event.pull_request.base.sha }}

- name: Build baseline library (${{ github.base_ref }})
if: steps.baseline-cache.outputs.cache-hit != 'true'
run: |
echo "Building baseline library from branch: ${{ github.base_ref }}"

export LLVM_SYS_150_PREFIX=/opt/llvm15
export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm
export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH

git stash push -u -m "perf-check-stash"
git checkout ${{ github.base_ref }}

cmake -S . -B build \
-DCMAKE_BUILD_TYPE=Release \
-DZEN_ENABLE_SINGLEPASS_JIT=OFF \
-DZEN_ENABLE_MULTIPASS_JIT=ON \
-DZEN_ENABLE_EVM=ON \
-DZEN_ENABLE_LIBEVM=ON \
-DZEN_ENABLE_JIT_PRECOMPILE_FALLBACK=ON \
-DZEN_ENABLE_CPU_EXCEPTION=ON \
-DZEN_ENABLE_VIRTUAL_STACK=ON
cmake --build build -j 16

mkdir -p /tmp/baseline_lib
cp build/lib/* /tmp/baseline_lib/

rm -rf build
git checkout ${{ github.sha }}
git stash pop || true

- name: Build current PR and check regression
id: perf-check
run: |
echo "Building PR branch: ${{ github.sha }}"

export LLVM_SYS_150_PREFIX=/opt/llvm15
export LLVM_DIR=$LLVM_SYS_150_PREFIX/lib/cmake/llvm
export PATH=$LLVM_SYS_150_PREFIX/bin:$PATH

rm -rf build evmone

export CMAKE_BUILD_TARGET=Release
export ENABLE_ASAN=false
export RUN_MODE=multipass
export ENABLE_LAZY=false
export ENABLE_MULTITHREAD=true
export TestSuite=benchmarksuite
export CPU_EXCEPTION_TYPE='cpu'
export BENCHMARK_MODE=${{ matrix.mode }}
export BENCHMARK_THRESHOLD=0.20
export BENCHMARK_BASELINE_CACHE=/tmp/perf_baseline_${{ matrix.mode }}.json
export BENCHMARK_BASELINE_LIB=/tmp/baseline_lib
export BENCHMARK_SUMMARY_FILE=/tmp/perf_summary_${{ matrix.mode }}.md

bash .ci/run_test_suite.sh
continue-on-error: true

- name: Write Performance Summary
if: always()
run: |
MODE="${{ matrix.mode }}"
OUTCOME="${{ steps.perf-check.outcome }}"
SUMMARY_FILE="/tmp/perf_summary_${MODE}.md"
if [ "$OUTCOME" = "success" ]; then
echo "✅ **Performance Check Passed (${MODE})**" >> $GITHUB_STEP_SUMMARY
else
echo "⚠️ **Performance Regression Detected (${MODE})**" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
if [ -f "$SUMMARY_FILE" ]; then
cat "$SUMMARY_FILE" >> $GITHUB_STEP_SUMMARY
else
echo "_No benchmark summary available._" >> $GITHUB_STEP_SUMMARY
fi

- name: Save performance artifacts
if: always()
run: |
mkdir -p /tmp/perf-artifacts
echo "${{ github.event.pull_request.number }}" > /tmp/perf-artifacts/pr_number
echo "${{ steps.perf-check.outcome }}" > /tmp/perf-artifacts/outcome
cp "/tmp/perf_summary_${{ matrix.mode }}.md" /tmp/perf-artifacts/summary.md 2>/dev/null || \
echo "_No benchmark summary available._" > /tmp/perf-artifacts/summary.md

- name: Upload performance results
if: always()
uses: actions/upload-artifact@v4
with:
name: perf-results-${{ matrix.mode }}
path: /tmp/perf-artifacts/
retention-days: 7

- name: Comment on PR
if: always()
uses: actions/github-script@v6
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actions/github-script@v6 runs on the deprecated Node 16 runtime; GitHub Actions is removing Node 16 support, which can break this step in the future. Consider bumping to actions/github-script@v7 (Node 20) to avoid upcoming CI failures.

Suggested change
uses: actions/github-script@v6
uses: actions/github-script@v7

Copilot uses AI. Check for mistakes.
continue-on-error: true
with:
script: |
const fs = require('fs');
const mode = '${{ matrix.mode }}';
const passed = '${{ steps.perf-check.outcome }}' === 'success';
let summary = '';
try {
summary = fs.readFileSync(`/tmp/perf_summary_${mode}.md`, 'utf8');
} catch (e) {
summary = '_No benchmark summary available._';
}
const icon = passed ? '✅' : '⚠️';
const title = passed
? `Performance Check Passed (${mode})`
: `Performance Regression Detected (${mode})`;
const body = `${icon} **${title}**\n\n${summary}`;
try {
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
} catch (error) {
core.warning(`Could not comment on PR (expected for fork PRs): ${error.message}. Results are available in the job summary above.`);
}

- name: Fail on regression
if: steps.perf-check.outcome == 'failure'
run: |
echo "::error::Performance regression detected in ${{ matrix.mode }} mode. See logs for details."
exit 1
96 changes: 96 additions & 0 deletions .github/workflows/perf_pr_comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: Post Performance Check Results

on:
workflow_run:
workflows: ["DTVM-EVM test CI in x86-64"]
types:
- completed

permissions:
pull-requests: write
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The workflow calls github.rest.issues.* (list/update/create comment), but the top-level permissions: does not grant issues: write. On fine-grained GITHUB_TOKEN permissions this can lead to 403s and the workflow failing to post comments. Please add issues: write (or switch to PR-specific APIs that work with only pull-requests: write).

Suggested change
pull-requests: write
pull-requests: write
issues: write

Copilot uses AI. Check for mistakes.
actions: read

jobs:
comment:
if: github.event.workflow_run.event == 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Download interpreter results
uses: actions/download-artifact@v4
with:
name: perf-results-interpreter
path: /tmp/perf-interpreter
github-token: ${{ secrets.GITHUB_TOKEN }}
run-id: ${{ github.event.workflow_run.id }}
continue-on-error: true

- name: Download multipass results
uses: actions/download-artifact@v4
with:
name: perf-results-multipass
path: /tmp/perf-multipass
github-token: ${{ secrets.GITHUB_TOKEN }}
run-id: ${{ github.event.workflow_run.id }}
continue-on-error: true

- name: Post PR comment
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
let prNumber = null;
let body = '<!-- perf-check-results -->\n## ⚡ Performance Regression Check Results\n\n';
let hasResults = false;

for (const mode of ['interpreter', 'multipass']) {
const dir = `/tmp/perf-${mode}`;
try {
if (!prNumber) {
prNumber = parseInt(fs.readFileSync(`${dir}/pr_number`, 'utf8').trim());
}
const outcome = fs.readFileSync(`${dir}/outcome`, 'utf8').trim();
const summary = fs.readFileSync(`${dir}/summary.md`, 'utf8');
Comment on lines +45 to +52
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This workflow trusts pr_number from artifacts generated by a pull_request run. For fork PRs, artifact contents are attacker-controlled; with this workflow’s write token, a malicious PR can set pr_number to any issue/PR and cause the workflow to spam or overwrite comments elsewhere in the repo. Please derive the PR number from github.event.workflow_run.pull_requests (and/or verify the artifact PR number matches that list) before posting/updating comments.

Copilot uses AI. Check for mistakes.
const passed = outcome === 'success';
const icon = passed ? '✅' : '⚠️';
const title = passed
? `Performance Check Passed (${mode})`
: `Performance Regression Detected (${mode})`;
body += `### ${icon} ${title}\n\n${summary}\n\n---\n\n`;
hasResults = true;
} catch (e) {
core.info(`No results for ${mode}: ${e.message}`);
}
}

if (!prNumber || !hasResults) {
core.info('No performance results to post');
return;
}

const { data: comments } = await github.rest.issues.listComments({
issue_number: prNumber,
owner: context.repo.owner,
repo: context.repo.repo,
});

const existing = comments.find(c =>
c.body && c.body.includes('<!-- perf-check-results -->')
);

if (existing) {
await github.rest.issues.updateComment({
comment_id: existing.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: body.trim()
});
core.info(`Updated existing comment ${existing.id}`);
} else {
await github.rest.issues.createComment({
issue_number: prNumber,
owner: context.repo.owner,
repo: context.repo.repo,
body: body.trim()
});
core.info('Created new PR comment');
}
Loading
Loading