Benchmark #838
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmark | |
| on: | |
| workflow_run: | |
| workflows: ["Publish"] | |
| types: [completed] | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: 'Version to benchmark for embedding-only ("dev" for local, or semver like "2.4.0" for npm)' | |
| required: false | |
| default: "dev" | |
| permissions: {} | |
| jobs: | |
| # ── Record benchmark history for the just-published release ── | |
| # | |
| # The build/query/incremental/resolution benchmarks are measured during the | |
| # Publish workflow's pre-publish-benchmark gate (against the just-built | |
| # native artifact). That job uploads the modified history files as an | |
| # artifact only when the regression guard passes — meaning a publish that | |
| # would have regressed is aborted before reaching npm, and no PR is opened | |
| # for an un-published release. This job consumes that artifact and opens a | |
| # single PR with the updates. | |
| record-benchmarks: | |
| runs-on: ubuntu-latest | |
| if: >- | |
| github.event_name == 'workflow_run' && | |
| github.event.workflow_run.conclusion == 'success' && | |
| github.event.workflow_run.event != 'push' | |
| permissions: | |
| actions: read | |
| contents: write | |
| pull-requests: write | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| ref: main | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Download benchmark history artifact | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: benchmark-files | |
| run-id: ${{ github.event.workflow_run.id }} | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Download benchmark JSON results | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: benchmark-results-json | |
| run-id: ${{ github.event.workflow_run.id }} | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Determine release version | |
| id: version | |
| run: | | |
| TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1) | |
| VERSION="${TAG#v}" | |
| echo "version=$VERSION" >> "$GITHUB_OUTPUT" | |
| - name: Check for changes | |
| id: changes | |
| run: | | |
| CHANGED=false | |
| if ! git diff --quiet HEAD -- generated/benchmarks/ README.md 2>/dev/null; then | |
| CHANGED=true | |
| fi | |
| if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/)" ]; then | |
| CHANGED=true | |
| fi | |
| echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" | |
| - name: Commit and push via PR | |
| if: steps.changes.outputs.changed == 'true' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| VERSION: ${{ steps.version.outputs.version }} | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| BRANCH="chore/bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)" | |
| git checkout -b "$BRANCH" | |
| git add generated/benchmarks/BUILD-BENCHMARKS.md generated/benchmarks/QUERY-BENCHMARKS.md generated/benchmarks/INCREMENTAL-BENCHMARKS.md README.md | |
| git commit -m "docs: update performance benchmarks (${VERSION})" | |
| git push origin "$BRANCH" | |
| TITLE="docs: update performance benchmarks (${VERSION})" | |
| if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then | |
| echo "::notice::PR already open for '$TITLE' — skipping" | |
| else | |
| gh pr create \ | |
| --base main \ | |
| --head "$BRANCH" \ | |
| --title "$TITLE" \ | |
| --body "Automated benchmark history update for **${VERSION}** from publish run [#${{ github.event.workflow_run.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}). These numbers were measured during the pre-publish gate and passed the regression guard before npm publish proceeded." | |
| fi | |
| # Engine-parity gate: surfaces wasm/native divergence as a red workflow | |
| # status (does not block — publish has already completed). Runs after | |
| # the doc PR is created so the PR still records data even when parity | |
| # regresses. | |
| - name: Engine parity gate | |
| run: node scripts/benchmark-parity-gate.mjs benchmark-result.json | |
| # ── Embedding benchmark (post-publish, npm-installed package) ── | |
| # | |
| # Embeddings have no regression guard and take 2.5+ hours to run, so they | |
| # cannot fit in the pre-publish path. They run after a successful publish | |
| # against the npm-installed package and open their own PR. | |
| embedding-benchmark: | |
| runs-on: ubuntu-latest | |
| # 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so | |
| # typical runtime is ~23 min/model ≈ 160 min + setup headroom | |
| timeout-minutes: 240 | |
| if: >- | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event.workflow_run.conclusion == 'success' && | |
| github.event.workflow_run.event != 'push') | |
| permissions: | |
| actions: read | |
| contents: write | |
| pull-requests: write | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| fetch-depth: 0 | |
| ref: main | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - uses: actions/setup-node@v6 | |
| with: | |
| node-version: "22" | |
| cache: "npm" | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: npm install --prefer-offline --no-audit --no-fund | |
| - name: Determine benchmark mode | |
| id: mode | |
| run: | | |
| if [ "${{ github.event_name }}" = "workflow_run" ]; then | |
| TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1) | |
| VERSION="${TAG#v}" | |
| echo "source=npm" >> "$GITHUB_OUTPUT" | |
| echo "version=$VERSION" >> "$GITHUB_OUTPUT" | |
| elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then | |
| echo "source=local" >> "$GITHUB_OUTPUT" | |
| echo "version=dev" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "source=npm" >> "$GITHUB_OUTPUT" | |
| echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check for existing benchmark | |
| id: existing | |
| run: | | |
| VERSION="${{ steps.mode.outputs.version }}" | |
| VERSION_RE="${VERSION//./\\.}" | |
| if [ "$VERSION" = "dev" ]; then | |
| echo "skip=false" >> "$GITHUB_OUTPUT" | |
| elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then | |
| echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping" | |
| echo "skip=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "skip=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Wait for npm propagation | |
| if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm' | |
| run: | | |
| VERSION="${{ steps.mode.outputs.version }}" | |
| echo "Waiting for @optave/codegraph@${VERSION} on npm..." | |
| for i in $(seq 1 20); do | |
| if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then | |
| echo "Package available on npm" | |
| exit 0 | |
| fi | |
| echo " Attempt $i/20 — not yet available, waiting 30s..." | |
| sleep 30 | |
| done | |
| echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes" | |
| exit 1 | |
| - name: Cache HuggingFace models | |
| if: steps.existing.outputs.skip != 'true' | |
| uses: actions/cache@v5 | |
| with: | |
| path: ~/.cache/huggingface | |
| key: hf-models-${{ runner.os }}-${{ hashFiles('src/domain/search/**') }} | |
| restore-keys: hf-models-${{ runner.os }}- | |
| - name: Build graph | |
| if: steps.existing.outputs.skip != 'true' | |
| run: npx codegraph build . | |
| - name: Run embedding benchmark | |
| if: steps.existing.outputs.skip != 'true' | |
| timeout-minutes: 160 | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") | |
| ARGS="--version ${{ steps.mode.outputs.version }}" | |
| if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then | |
| ARGS="$ARGS --npm" | |
| fi | |
| node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/embedding-benchmark.ts $ARGS > embedding-benchmark-result.json | |
| - name: Update embedding report | |
| if: steps.existing.outputs.skip != 'true' | |
| run: | | |
| STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") | |
| node $STRIP_FLAG scripts/update-embedding-report.ts embedding-benchmark-result.json | |
| - name: Upload embedding result | |
| if: steps.existing.outputs.skip != 'true' | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: embedding-benchmark-result | |
| path: embedding-benchmark-result.json | |
| - name: Check for changes | |
| if: steps.existing.outputs.skip != 'true' | |
| id: changes | |
| run: | | |
| CHANGED=false | |
| # Detect modified tracked files | |
| if ! git diff --quiet HEAD -- generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then | |
| CHANGED=true | |
| fi | |
| # Detect newly created (untracked) files | |
| if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/EMBEDDING-BENCHMARKS.md)" ]; then | |
| CHANGED=true | |
| fi | |
| echo "changed=$CHANGED" >> "$GITHUB_OUTPUT" | |
| - name: Commit and push via PR | |
| if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| VERSION: ${{ steps.mode.outputs.version }} | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| if [ "$VERSION" = "dev" ]; then | |
| BRANCH="chore/embedding-bench-dev-$(date +%Y%m%d-%H%M%S)" | |
| else | |
| BRANCH="chore/embedding-bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)" | |
| fi | |
| git checkout -b "$BRANCH" | |
| git add generated/benchmarks/EMBEDDING-BENCHMARKS.md | |
| git commit -m "docs: update embedding benchmarks (${VERSION})" | |
| git push origin "$BRANCH" | |
| TITLE="docs: update embedding benchmarks (${VERSION})" | |
| if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then | |
| echo "::notice::PR already open for '$TITLE' — skipping" | |
| else | |
| gh pr create \ | |
| --base main \ | |
| --head "$BRANCH" \ | |
| --title "$TITLE" \ | |
| --body "Automated embedding benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})." | |
| fi |