Skip to content

Add competitive eval harness with measured scorecards #5

Add competitive eval harness with measured scorecards

Add competitive eval harness with measured scorecards #5

name: Benchmark Suite
on:
workflow_dispatch:
push:
branches: [main]
paths:
- "eval/benchmark-suite.json"
- "eval/ground-truth/**"
- "scripts/run-benchmark-suite.ps1"
- ".github/workflows/benchmark-suite.yml"
pull_request:
branches: [main]
paths:
- "scripts/run-benchmark-suite.ps1"
- "scripts/export-benchmark-diffs.ps1"
- "scripts/score-competitive-benchmark.py"
- "eval/competitor-scope.json"
- "scripts/enrich-benchmark-sonarcloud.py"
- "scripts/enrich-benchmark-semgrep.py"
- "scripts/benchmark_lib.py"
- "eval/ground-truth/**"
jobs:
benchmark-suite:
runs-on: windows-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-dotnet@v5
with:
dotnet-version: 8.0.x
- name: Export diffs
shell: pwsh
env:
GH_TOKEN: ${{ github.token }}
run: ./scripts/export-benchmark-diffs.ps1 -CiOnly
- name: Run CI fixtures
shell: pwsh
env:
GH_TOKEN: ${{ github.token }}
run: |
./scripts/run-benchmark-suite.ps1 -CiOnly
python scripts/score-competitive-benchmark.py --all-with-ground-truth --check
- name: Scorecard check
shell: pwsh
run: python scripts/score-competitive-benchmark.py --fixture stackexchange-redis-pr-2995 --check