From b61a91abf2737e853b205cb8e3e48ee702b66621 Mon Sep 17 00:00:00 2001
From: huntharo <harold@pwrdrvr.com>
Date: Thu, 12 Mar 2026 07:27:10 -0400
Subject: [PATCH 1/5] test: add cluster performance benchmark

---
 .github/workflows/ci.yml                      |  14 +
 package.json                                  |   1 +
 packages/api-core/package.json                |   1 +
 .../api-core/src/cluster/perf-baseline.json   |  30 ++
 .../api-core/src/cluster/perf.integration.ts  | 397 ++++++++++++++++++
 5 files changed, 443 insertions(+)
 create mode 100644 packages/api-core/src/cluster/perf-baseline.json
 create mode 100644 packages/api-core/src/cluster/perf.integration.ts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 347006f..7785df6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -57,6 +57,20 @@ jobs:
       - name: Run workspace tests
         run: pnpm test
 
+  cluster-perf:
+    name: Cluster Perf
+    runs-on: ubuntu-latest
+    needs: install-deps
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Install Node.js and dependencies
+        uses: ./.github/actions/configure-nodejs
+
+      - name: Run cluster performance integration test
+        run: pnpm test:cluster-perf
+
   cli-smoke:
     name: CLI Smoke
     runs-on: ubuntu-latest
diff --git a/package.json b/package.json
index 1c63108..5d5e047 100644
--- a/package.json
+++ b/package.json
@@ -28,6 +28,7 @@
     "neighbors": "node ./apps/cli/bin/ghcrawl.js neighbors",
     "tui": "node ./apps/cli/bin/ghcrawl.js tui",
     "serve": "node ./apps/cli/bin/ghcrawl.js serve",
+    "test:cluster-perf": "pnpm --filter @ghcrawl/api-core test:cluster-perf",
     "pack:smoke": "node ./scripts/pack-smoke.mjs",
     "release:apply-version": "node ./scripts/apply-release-version.mjs",
     "typecheck": "pnpm build && pnpm -r typecheck",
diff --git a/packages/api-core/package.json b/packages/api-core/package.json
index b94a0cd..819efe9 100644
--- a/packages/api-core/package.json
+++ b/packages/api-core/package.json
@@ -40,6 +40,7 @@
   "scripts": {
     "build": "tsc -p tsconfig.build.json",
     "prepack": "pnpm --filter @ghcrawl/api-contract build && pnpm run build",
+    "test:cluster-perf": "tsx --tsconfig tsconfig.test.json src/cluster/perf.integration.ts",
     "typecheck": "tsc -p tsconfig.json --noEmit",
     "test": "tsx --tsconfig tsconfig.test.json --test src/*.test.ts src/**/*.test.ts"
   },
diff --git a/packages/api-core/src/cluster/perf-baseline.json b/packages/api-core/src/cluster/perf-baseline.json
new file mode 100644
index 0000000..6fccc88
--- /dev/null
+++ b/packages/api-core/src/cluster/perf-baseline.json
@@ -0,0 +1,30 @@
+{
+  "schemaVersion": 1,
+  "fixture": {
+    "clusterCount": 64,
+    "threadsPerCluster": 8,
+    "clusterBlockWidth": 4,
+    "noiseDimensions": 32,
+    "sourceKinds": [
+      "title",
+      "body",
+      "dedupe_summary"
+    ],
+    "k": 7,
+    "minScore": 0.82
+  },
+  "benchmark": {
+    "warmupRuns": 1,
+    "runsPerSample": 3,
+    "minSamples": 5,
+    "maxSamples": 12,
+    "maxTotalMs": 10000
+  },
+  "baseline": {
+    "fixtureMedianMs": 239.1,
+    "projectedOpenclawMs": 600000
+  },
+  "thresholds": {
+    "maxRegressionPercent": 50
+  }
+}
diff --git a/packages/api-core/src/cluster/perf.integration.ts b/packages/api-core/src/cluster/perf.integration.ts
new file mode 100644
index 0000000..f6d021c
--- /dev/null
+++ b/packages/api-core/src/cluster/perf.integration.ts
@@ -0,0 +1,397 @@
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { performance } from 'node:perf_hooks';
+import { fileURLToPath } from 'node:url';
+
+import { GHCrawlService } from '../service.js';
+
+type EmbeddingSourceKind = 'title' | 'body' | 'dedupe_summary';
+
+type PerfBaseline = {
+  schemaVersion: number;
+  fixture: {
+    clusterCount: number;
+    threadsPerCluster: number;
+    clusterBlockWidth: number;
+    noiseDimensions: number;
+    sourceKinds: EmbeddingSourceKind[];
+    k: number;
+    minScore: number;
+  };
+  benchmark: {
+    warmupRuns: number;
+    runsPerSample: number;
+    minSamples: number;
+    maxSamples: number;
+    maxTotalMs: number;
+  };
+  baseline: {
+    fixtureMedianMs: number;
+    projectedOpenclawMs: number;
+  };
+  thresholds: {
+    maxRegressionPercent: number;
+  };
+};
+
+type PerfRunResult = {
+  sampleDurationsMs: number[];
+  medianMs: number;
+  baselineMedianMs: number;
+  deltaMs: number;
+  deltaPercent: number;
+  projectedOpenclawMs: number;
+  projectedBaselineOpenclawMs: number;
+  projectedDeltaMs: number;
+  projectedDeltaPercent: number;
+  samples: number;
+  runsPerSample: number;
+  threadCount: number;
+  sourceKinds: EmbeddingSourceKind[];
+  maxRegressionPercent: number;
+};
+
+const BASELINE_PATH = fileURLToPath(new URL('./perf-baseline.json', import.meta.url));
+
+function loadBaseline(): PerfBaseline {
+  return JSON.parse(fs.readFileSync(BASELINE_PATH, 'utf8')) as PerfBaseline;
+}
+
+function shouldBootstrapBaseline(): boolean {
+  return process.env.GHCRAWL_CLUSTER_PERF_BOOTSTRAP === '1';
+}
+
+function formatDurationMs(durationMs: number): string {
+  if (!Number.isFinite(durationMs)) return 'n/a';
+  if (durationMs < 1000) {
+    return `${durationMs.toFixed(1)} ms`;
+  }
+  const totalSeconds = durationMs / 1000;
+  if (totalSeconds < 60) {
+    return `${totalSeconds.toFixed(2)} s`;
+  }
+  const minutes = Math.floor(totalSeconds / 60);
+  const seconds = totalSeconds - minutes * 60;
+  return `${minutes}m ${seconds.toFixed(1)}s`;
+}
+
+function formatPercent(value: number): string {
+  const sign = value > 0 ? '+' : '';
+  return `${sign}${value.toFixed(1)}%`;
+}
+
+function median(values: number[]): number {
+  const sorted = [...values].sort((left, right) => left - right);
+  const middle = Math.floor(sorted.length / 2);
+  if (sorted.length % 2 === 0) {
+    return (sorted[middle - 1] + sorted[middle]) / 2;
+  }
+  return sorted[middle] ?? 0;
+}
+
+function createGitHubStub(): GHCrawlService['github'] {
+  return {
+    checkAuth: async () => undefined,
+    getRepo: async () => ({}),
+    listRepositoryIssues: async () => [],
+    getIssue: async () => ({}),
+    getPull: async () => ({}),
+    listIssueComments: async () => [],
+    listPullReviews: async () => [],
+    listPullReviewComments: async () => [],
+  };
+}
+
+function createService(dbPath: string): GHCrawlService {
+  return new GHCrawlService({
+    config: {
+      workspaceRoot: process.cwd(),
+      configDir: path.dirname(dbPath),
+      configPath: path.join(path.dirname(dbPath), 'config.json'),
+      configFileExists: true,
+      dbPath,
+      dbPathSource: 'config',
+      apiPort: 5179,
+      githubToken: 'ghp_testtoken1234567890',
+      githubTokenSource: 'config',
+      secretProvider: 'plaintext',
+      tuiPreferences: {},
+      openaiApiKeySource: 'none',
+      summaryModel: 'gpt-5-mini',
+      embedModel: 'text-embedding-3-large',
+      embedBatchSize: 2,
+      embedConcurrency: 2,
+      embedMaxUnread: 4,
+      openSearchIndex: 'ghcrawl-threads',
+    },
+    github: createGitHubStub(),
+  });
+}
+
+function deterministicNoise(seed: number): number {
+  const next = (Math.imul(seed, 1664525) + 1013904223) >>> 0;
+  return (next / 0xffffffff - 0.5) * 0.025;
+}
+
+function buildDeterministicEmbedding(params: {
+  clusterIndex: number;
+  threadOffset: number;
+  sourceIndex: number;
+  clusterCount: number;
+  clusterBlockWidth: number;
+  noiseDimensions: number;
+  sourceKinds: EmbeddingSourceKind[];
+}): number[] {
+  const dimensions = params.clusterCount * params.clusterBlockWidth + params.noiseDimensions + params.sourceKinds.length;
+  const embedding = new Array<number>(dimensions).fill(0);
+  const clusterBase = params.clusterIndex * params.clusterBlockWidth;
+  const sourceBias = 0.02 * (params.sourceIndex + 1);
+  const memberBias = 0.01 * ((params.threadOffset % 5) + 1);
+
+  embedding[clusterBase] = 1;
+  if (params.clusterBlockWidth > 1) embedding[clusterBase + 1] = 0.72 + sourceBias;
+  if (params.clusterBlockWidth > 2) embedding[clusterBase + 2] = 0.48 + memberBias;
+  if (params.clusterBlockWidth > 3) embedding[clusterBase + 3] = 0.28 + sourceBias + memberBias;
+
+  const sourceOffset = params.clusterCount * params.clusterBlockWidth + params.sourceIndex;
+  embedding[sourceOffset] = 0.12 + sourceBias;
+
+  const noiseBase = params.clusterCount * params.clusterBlockWidth + params.sourceKinds.length;
+  for (let index = 0; index < params.noiseDimensions; index += 1) {
+    const seed = params.clusterIndex * 10_000 + params.threadOffset * 100 + params.sourceIndex * 10 + index;
+    embedding[noiseBase + index] = deterministicNoise(seed);
+  }
+
+  return embedding;
+}
+
+function seedBenchmarkDatabase(dbPath: string, baseline: PerfBaseline): void {
+  const service = createService(dbPath);
+  const threadCount = baseline.fixture.clusterCount * baseline.fixture.threadsPerCluster;
+  const now = '2026-03-12T12:00:00Z';
+
+  try {
+    service.db
+      .prepare(
+        `insert into repositories (id, owner, name, full_name, github_repo_id, raw_json, updated_at)
+         values (?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(1, 'openclaw', 'openclaw', 'openclaw/openclaw', '1', '{}', now);
+
+    const insertThread = service.db.prepare(
+      `insert into threads (
+        id, repo_id, github_id, number, kind, state, title, body, author_login, author_type, html_url,
+        labels_json, assignees_json, raw_json, content_hash, is_draft, created_at_gh, updated_at_gh, closed_at_gh,
+        merged_at_gh, first_pulled_at, last_pulled_at, updated_at
+      ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+    const insertEmbedding = service.db.prepare(
+      `insert into document_embeddings (thread_id, source_kind, model, dimensions, content_hash, embedding_json, created_at, updated_at)
+       values (?, ?, ?, ?, ?, ?, ?, ?)`,
+    );
+
+    for (let clusterIndex = 0; clusterIndex < baseline.fixture.clusterCount; clusterIndex += 1) {
+      for (let threadOffset = 0; threadOffset < baseline.fixture.threadsPerCluster; threadOffset += 1) {
+        const threadId = clusterIndex * baseline.fixture.threadsPerCluster + threadOffset + 1;
+        const threadNumber = 10_000 + threadId;
+        const kind = threadOffset % 3 === 0 ? 'pull_request' : 'issue';
+        insertThread.run(
+          threadId,
+          1,
+          `gh-${threadId}`,
+          threadNumber,
+          kind,
+          'open',
+          `Cluster ${clusterIndex + 1} thread ${threadOffset + 1}`,
+          `Deterministic benchmark fixture body for cluster ${clusterIndex + 1}, thread ${threadOffset + 1}.`,
+          `user${(threadId % 17) + 1}`,
+          'User',
+          `https://github.com/openclaw/openclaw/${kind === 'issue' ? 'issues' : 'pull'}/${threadNumber}`,
+          '[]',
+          '[]',
+          '{}',
+          `hash-${threadId}`,
+          0,
+          now,
+          now,
+          null,
+          null,
+          now,
+          now,
+          now,
+        );
+
+        for (const [sourceIndex, sourceKind] of baseline.fixture.sourceKinds.entries()) {
+          const embedding = buildDeterministicEmbedding({
+            clusterIndex,
+            threadOffset,
+            sourceIndex,
+            clusterCount: baseline.fixture.clusterCount,
+            clusterBlockWidth: baseline.fixture.clusterBlockWidth,
+            noiseDimensions: baseline.fixture.noiseDimensions,
+            sourceKinds: baseline.fixture.sourceKinds,
+          });
+          insertEmbedding.run(
+            threadId,
+            sourceKind,
+            'text-embedding-3-large',
+            embedding.length,
+            `hash-${threadId}-${sourceKind}`,
+            JSON.stringify(embedding),
+            now,
+            now,
+          );
+        }
+      }
+    }
+
+    const countRow = service.db.prepare('select count(*) as count from threads').get() as { count: number };
+    assert.equal(threadCount, countRow.count);
+  } finally {
+    service.close();
+  }
+}
+
+async function runSingleCluster(dbPath: string, baseline: PerfBaseline): Promise<{ durationMs: number; clusters: number; edges: number }> {
+  const service = createService(dbPath);
+  try {
+    const startedAt = performance.now();
+    const result = await service.clusterRepository({
+      owner: 'openclaw',
+      repo: 'openclaw',
+      k: baseline.fixture.k,
+      minScore: baseline.fixture.minScore,
+    });
+    const durationMs = performance.now() - startedAt;
+    return { durationMs, clusters: result.clusters, edges: result.edges };
+  } finally {
+    service.close();
+  }
+}
+
+async function measureBenchmark(baseline: PerfBaseline): Promise<PerfRunResult> {
+  if (baseline.baseline.fixtureMedianMs <= 0 && !shouldBootstrapBaseline()) {
+    throw new Error(
+      `Cluster perf baseline is not set in ${BASELINE_PATH}. Run the benchmark once, then record fixtureMedianMs before enforcing regressions.`,
+    );
+  }
+
+  const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ghcrawl-cluster-perf-'));
+  const seedDbPath = path.join(tempRoot, 'seed.sqlite');
+  try {
+    seedBenchmarkDatabase(seedDbPath, baseline);
+
+    const warmupRuns = baseline.benchmark.warmupRuns;
+    const runsPerSample = baseline.benchmark.runsPerSample;
+    const sampleDurationsMs: number[] = [];
+    const benchmarkStartedAt = performance.now();
+    let runCounter = 0;
+
+    for (let warmupIndex = 0; warmupIndex < warmupRuns; warmupIndex += 1) {
+      const warmupDbPath = path.join(tempRoot, `warmup-${warmupIndex}.sqlite`);
+      fs.copyFileSync(seedDbPath, warmupDbPath);
+      const warmupResult = await runSingleCluster(warmupDbPath, baseline);
+      assert.equal(warmupResult.clusters, baseline.fixture.clusterCount);
+      assert.ok(warmupResult.edges > baseline.fixture.clusterCount);
+    }
+
+    while (sampleDurationsMs.length < baseline.benchmark.maxSamples) {
+      const sampleStartedAt = performance.now();
+      for (let runIndex = 0; runIndex < runsPerSample; runIndex += 1) {
+        const runDbPath = path.join(tempRoot, `run-${runCounter}.sqlite`);
+        runCounter += 1;
+        fs.copyFileSync(seedDbPath, runDbPath);
+        const result = await runSingleCluster(runDbPath, baseline);
+        assert.equal(result.clusters, baseline.fixture.clusterCount);
+        assert.ok(result.edges > baseline.fixture.clusterCount);
+      }
+      sampleDurationsMs.push(performance.now() - sampleStartedAt);
+
+      const elapsedMs = performance.now() - benchmarkStartedAt;
+      if (sampleDurationsMs.length >= baseline.benchmark.minSamples && elapsedMs >= baseline.benchmark.maxTotalMs) {
+        break;
+      }
+    }
+
+    const medianMs = median(sampleDurationsMs);
+    const baselineMedianMs = baseline.baseline.fixtureMedianMs > 0 ? baseline.baseline.fixtureMedianMs : medianMs;
+    const deltaMs = medianMs - baselineMedianMs;
+    const deltaPercent = baselineMedianMs > 0 ? (deltaMs / baselineMedianMs) * 100 : 0;
+    const projectedOpenclawMs = baseline.baseline.projectedOpenclawMs * (medianMs / baselineMedianMs);
+    const projectedBaselineOpenclawMs = baseline.baseline.projectedOpenclawMs;
+    const projectedDeltaMs = projectedOpenclawMs - projectedBaselineOpenclawMs;
+    const projectedDeltaPercent = (projectedDeltaMs / projectedBaselineOpenclawMs) * 100;
+
+    return {
+      sampleDurationsMs,
+      medianMs,
+      baselineMedianMs,
+      deltaMs,
+      deltaPercent,
+      projectedOpenclawMs,
+      projectedBaselineOpenclawMs,
+      projectedDeltaMs,
+      projectedDeltaPercent,
+      samples: sampleDurationsMs.length,
+      runsPerSample,
+      threadCount: baseline.fixture.clusterCount * baseline.fixture.threadsPerCluster,
+      sourceKinds: baseline.fixture.sourceKinds,
+      maxRegressionPercent: baseline.thresholds.maxRegressionPercent,
+    };
+  } finally {
+    fs.rmSync(tempRoot, { recursive: true, force: true });
+  }
+}
+
+function buildSummary(result: PerfRunResult): string {
+  const status = result.deltaPercent > result.maxRegressionPercent ? 'FAIL' : 'PASS';
+  const sampleList = result.sampleDurationsMs.map((value) => formatDurationMs(value)).join(', ');
+  const bootstrapLine =
+    result.baselineMedianMs === result.medianMs
+      ? '- Bootstrap mode: using the current fixture median as the provisional baseline'
+      : null;
+  return [
+    '## Cluster Performance',
+    '',
+    `- Status: ${status}`,
+    `- Fixture median: ${formatDurationMs(result.medianMs)} (${result.samples} samples, ${result.runsPerSample} cluster rebuilds/sample)`,
+    `- Fixture baseline: ${formatDurationMs(result.baselineMedianMs)}`,
+    `- Fixture delta: ${formatDurationMs(result.deltaMs)} (${formatPercent(result.deltaPercent)})`,
+    `- Projected openclaw/openclaw duration: ${formatDurationMs(result.projectedOpenclawMs)}`,
+    `- Projected openclaw/openclaw baseline: ${formatDurationMs(result.projectedBaselineOpenclawMs)}`,
+    `- Projected delta: ${formatDurationMs(result.projectedDeltaMs)} (${formatPercent(result.projectedDeltaPercent)})`,
+    `- Regression threshold: ${formatPercent(result.maxRegressionPercent)}`,
+    `- Fixture shape: ${result.threadCount} threads x ${result.sourceKinds.length} source kinds`,
+    `- Sample durations: ${sampleList}`,
+    bootstrapLine,
+    '',
+  ]
+    .filter((line): line is string => line !== null)
+    .join('\n');
+}
+
+async function main(): Promise<void> {
+  const baseline = loadBaseline();
+  const result = await measureBenchmark(baseline);
+  const summary = buildSummary(result);
+  const bootstrap = shouldBootstrapBaseline();
+
+  process.stdout.write(`${summary}\n`);
+  if (bootstrap) {
+    process.stdout.write(`Suggested fixtureMedianMs: ${result.medianMs.toFixed(1)}\n`);
+  }
+  const summaryPath = process.env.GITHUB_STEP_SUMMARY;
+  if (summaryPath) {
+    fs.appendFileSync(summaryPath, `${summary}\n`);
+  }
+
+  if (!bootstrap && result.deltaPercent > result.maxRegressionPercent) {
+    throw new Error(
+      `Cluster perf regression exceeded threshold: ${formatPercent(result.deltaPercent)} > ${formatPercent(result.maxRegressionPercent)}`,
+    );
+  }
+}
+
+await main();

From 4a720fe054a554c75091766309a5576bd33cbd76 Mon Sep 17 00:00:00 2001
From: huntharo <harold@pwrdrvr.com>
Date: Thu, 12 Mar 2026 07:32:41 -0400
Subject: [PATCH 2/5] test: calibrate cluster perf baseline for ci

---
 packages/api-core/src/cluster/perf-baseline.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/api-core/src/cluster/perf-baseline.json b/packages/api-core/src/cluster/perf-baseline.json
index 6fccc88..2f2f324 100644
--- a/packages/api-core/src/cluster/perf-baseline.json
+++ b/packages/api-core/src/cluster/perf-baseline.json
@@ -21,7 +21,7 @@
     "maxTotalMs": 10000
   },
   "baseline": {
-    "fixtureMedianMs": 239.1,
+    "fixtureMedianMs": 535.1,
     "projectedOpenclawMs": 600000
   },
   "thresholds": {

From 5740046dac8f40eddbda2307ac59ef2c68ea1a32 Mon Sep 17 00:00:00 2001
From: huntharo <harold@pwrdrvr.com>
Date: Thu, 12 Mar 2026 07:35:38 -0400
Subject: [PATCH 3/5] docs: require pr actions follow-through

---
 AGENTS.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 929414e..c503e7a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -83,6 +83,12 @@ Common commands:
 
 If a change affects OpenAI-backed paths, avoid unnecessary live spend unless the user wants a real run.
 
+When you create or update a PR, follow through on GitHub Actions with `gh`:
+
+- monitor the PR checks after pushing
+- inspect failed job logs directly with `gh` instead of asking the user to paste errors back
+- keep fixing and pushing until the PR checks pass, unless the user explicitly wants to stop earlier
+
 ## Editing Guidance
 
 - Keep package boundaries intact:

From d4effd29bf0c072642456b3bee578dc329bb33fc Mon Sep 17 00:00:00 2001
From: huntharo <harold@pwrdrvr.com>
Date: Thu, 12 Mar 2026 09:28:07 -0400
Subject: [PATCH 4/5] ci: post sticky cluster perf pr comment

---
 .github/workflows/ci.yml                      | 52 +++++++++++++++++++
 .../api-core/src/cluster/perf.integration.ts  | 26 +++++++++-
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7785df6..30535ad 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,6 +17,7 @@ on:
 
 permissions:
   contents: read
+  issues: write
 
 jobs:
   install-deps:
@@ -69,8 +70,59 @@ jobs:
         uses: ./.github/actions/configure-nodejs
 
       - name: Run cluster performance integration test
+        env:
+          GHCRAWL_CLUSTER_PERF_OUTPUT_PATH: ${{ runner.temp }}/cluster-perf.json
         run: pnpm test:cluster-perf
 
+      - name: Update PR cluster perf comment
+        if: ${{ always() && github.event_name == 'pull_request' }}
+        uses: actions/github-script@v8
+        env:
+          CLUSTER_PERF_OUTPUT_PATH: ${{ runner.temp }}/cluster-perf.json
+          CLUSTER_PERF_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        with:
+          script: |
+            const fs = require('node:fs');
+            const outputPath = process.env.CLUSTER_PERF_OUTPUT_PATH;
+            const marker = '<!-- ghcrawl-cluster-perf -->';
+            const runUrl = process.env.CLUSTER_PERF_RUN_URL;
+            const sha = context.sha.slice(0, 7);
+            let summary = '## Cluster Performance\n\nBenchmark output was not produced. See the workflow run for details.';
+
+            if (outputPath && fs.existsSync(outputPath)) {
+              const parsed = JSON.parse(fs.readFileSync(outputPath, 'utf8'));
+              summary = parsed.summary;
+            }
+
+            const body = `${marker}\n${summary}\n_Run: [workflow run](${runUrl}) for \`${sha}\`_`;
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              per_page: 100,
+            });
+
+            const existing = comments.find((comment) =>
+              comment.user?.login === 'github-actions[bot]' && comment.body?.includes(marker),
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
+
   cli-smoke:
     name: CLI Smoke
     runs-on: ubuntu-latest
diff --git a/packages/api-core/src/cluster/perf.integration.ts b/packages/api-core/src/cluster/perf.integration.ts
index f6d021c..a5796e9 100644
--- a/packages/api-core/src/cluster/perf.integration.ts
+++ b/packages/api-core/src/cluster/perf.integration.ts
@@ -372,11 +372,34 @@ function buildSummary(result: PerfRunResult): string {
     .join('\n');
 }
 
+function writeOutput(result: PerfRunResult, summary: string, bootstrap: boolean): void {
+  const outputPath = process.env.GHCRAWL_CLUSTER_PERF_OUTPUT_PATH;
+  if (!outputPath) {
+    return;
+  }
+
+  fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+  fs.writeFileSync(
+    outputPath,
+    JSON.stringify(
+      {
+        status: result.deltaPercent > result.maxRegressionPercent ? 'FAIL' : 'PASS',
+        bootstrap,
+        summary,
+        result,
+      },
+      null,
+      2,
+    ) + '\n',
+  );
+}
+
 async function main(): Promise<void> {
   const baseline = loadBaseline();
   const result = await measureBenchmark(baseline);
   const summary = buildSummary(result);
   const bootstrap = shouldBootstrapBaseline();
+  const shouldFail = !bootstrap && result.deltaPercent > result.maxRegressionPercent;
 
   process.stdout.write(`${summary}\n`);
   if (bootstrap) {
@@ -386,8 +409,9 @@ async function main(): Promise<void> {
   if (summaryPath) {
     fs.appendFileSync(summaryPath, `${summary}\n`);
   }
+  writeOutput(result, summary, bootstrap);
 
-  if (!bootstrap && result.deltaPercent > result.maxRegressionPercent) {
+  if (shouldFail) {
     throw new Error(
       `Cluster perf regression exceeded threshold: ${formatPercent(result.deltaPercent)} > ${formatPercent(result.maxRegressionPercent)}`,
     );

From b12b8dbb59b76a798874ed8a81ffe845e6da4c36 Mon Sep 17 00:00:00 2001
From: huntharo <harold@pwrdrvr.com>
Date: Thu, 12 Mar 2026 09:31:24 -0400
Subject: [PATCH 5/5] ci: move cluster perf pr comment to workflow run

---
 .github/workflows/ci.yml                   |  54 +--------
 .github/workflows/cluster-perf-comment.yml | 128 +++++++++++++++++++++
 2 files changed, 134 insertions(+), 48 deletions(-)
 create mode 100644 .github/workflows/cluster-perf-comment.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 30535ad..0e5f58d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,6 @@ on:
 
 permissions:
   contents: read
-  issues: write
 
 jobs:
   install-deps:
@@ -74,54 +73,13 @@ jobs:
           GHCRAWL_CLUSTER_PERF_OUTPUT_PATH: ${{ runner.temp }}/cluster-perf.json
         run: pnpm test:cluster-perf
 
-      - name: Update PR cluster perf comment
-        if: ${{ always() && github.event_name == 'pull_request' }}
-        uses: actions/github-script@v8
-        env:
-          CLUSTER_PERF_OUTPUT_PATH: ${{ runner.temp }}/cluster-perf.json
-          CLUSTER_PERF_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+      - name: Upload cluster performance report artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
         with:
-          script: |
-            const fs = require('node:fs');
-            const outputPath = process.env.CLUSTER_PERF_OUTPUT_PATH;
-            const marker = '<!-- ghcrawl-cluster-perf -->';
-            const runUrl = process.env.CLUSTER_PERF_RUN_URL;
-            const sha = context.sha.slice(0, 7);
-            let summary = '## Cluster Performance\n\nBenchmark output was not produced. See the workflow run for details.';
-
-            if (outputPath && fs.existsSync(outputPath)) {
-              const parsed = JSON.parse(fs.readFileSync(outputPath, 'utf8'));
-              summary = parsed.summary;
-            }
-
-            const body = `${marker}\n${summary}\n_Run: [workflow run](${runUrl}) for \`${sha}\`_`;
-
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: context.issue.number,
-              per_page: 100,
-            });
-
-            const existing = comments.find((comment) =>
-              comment.user?.login === 'github-actions[bot]' && comment.body?.includes(marker),
-            );
-
-            if (existing) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: existing.id,
-                body,
-              });
-            } else {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: context.issue.number,
-                body,
-              });
-            }
+          name: cluster-perf-report
+          path: ${{ runner.temp }}/cluster-perf.json
+          if-no-files-found: ignore
 
   cli-smoke:
     name: CLI Smoke
diff --git a/.github/workflows/cluster-perf-comment.yml b/.github/workflows/cluster-perf-comment.yml
new file mode 100644
index 0000000..172ea94
--- /dev/null
+++ b/.github/workflows/cluster-perf-comment.yml
@@ -0,0 +1,128 @@
+name: Cluster Perf PR Comment
+
+on:
+  workflow_dispatch:
+    inputs:
+      run_id:
+        description: CI workflow run ID to read the cluster perf artifact from
+        required: true
+        type: string
+      pr_number:
+        description: Pull request number to comment on
+        required: true
+        type: string
+  workflow_run:
+    workflows: ["CI"]
+    types: [completed]
+
+permissions:
+  actions: read
+  contents: read
+  issues: write
+
+jobs:
+  update-pr-comment:
+    name: Update PR Comment
+    runs-on: ubuntu-latest
+    steps:
+      - name: Resolve workflow run context
+        env:
+          GH_TOKEN: ${{ github.token }}
+          REPO: ${{ github.repository }}
+          EVENT_NAME: ${{ github.event_name }}
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_EVENT: ${{ github.event.workflow_run.event }}
+          WORKFLOW_RUN_PR_NUMBER: ${{ github.event.workflow_run.pull_requests[0].number }}
+          WORKFLOW_RUN_URL: ${{ github.event.workflow_run.html_url }}
+          WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
+          INPUT_RUN_ID: ${{ inputs.run_id }}
+          INPUT_PR_NUMBER: ${{ inputs.pr_number }}
+        run: |
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            run_id="$INPUT_RUN_ID"
+            pr_number="$INPUT_PR_NUMBER"
+            run_json="$(gh api "repos/${REPO}/actions/runs/${run_id}")"
+            run_url="$(printf '%s' "$run_json" | jq -r '.html_url')"
+            head_sha="$(printf '%s' "$run_json" | jq -r '.head_sha')"
+          else
+            if [ "$WORKFLOW_RUN_EVENT" != "pull_request" ] || [ -z "$WORKFLOW_RUN_PR_NUMBER" ] || [ "$WORKFLOW_RUN_PR_NUMBER" = "null" ]; then
+              echo "skip_comment=true" >> "$GITHUB_ENV"
+              exit 0
+            fi
+
+            run_id="$WORKFLOW_RUN_ID"
+            pr_number="$WORKFLOW_RUN_PR_NUMBER"
+            run_url="$WORKFLOW_RUN_URL"
+            head_sha="$WORKFLOW_RUN_HEAD_SHA"
+          fi
+
+          echo "skip_comment=false" >> "$GITHUB_ENV"
+          echo "cluster_perf_run_id=${run_id}" >> "$GITHUB_ENV"
+          echo "cluster_perf_pr_number=${pr_number}" >> "$GITHUB_ENV"
+          echo "cluster_perf_run_url=${run_url}" >> "$GITHUB_ENV"
+          echo "cluster_perf_head_sha=${head_sha}" >> "$GITHUB_ENV"
+
+      - name: Download cluster perf report artifact
+        if: ${{ env.skip_comment != 'true' }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ env.cluster_perf_run_id }}
+        run: |
+          artifact_id="$(gh api "repos/${REPO}/actions/runs/${RUN_ID}/artifacts" --jq '.artifacts[] | select(.name == "cluster-perf-report") | .id' | head -n1)"
+          if [ -z "${artifact_id}" ]; then
+            echo "artifact_found=false" >> "$GITHUB_ENV"
+            exit 0
+          fi
+
+          gh api "repos/${REPO}/actions/artifacts/${artifact_id}/zip" > "$RUNNER_TEMP/cluster-perf-report.zip"
+          mkdir -p "$RUNNER_TEMP/cluster-perf-report"
+          unzip -o "$RUNNER_TEMP/cluster-perf-report.zip" -d "$RUNNER_TEMP/cluster-perf-report"
+          echo "artifact_found=true" >> "$GITHUB_ENV"
+          echo "cluster_perf_report_path=$RUNNER_TEMP/cluster-perf-report/cluster-perf.json" >> "$GITHUB_ENV"
+
+      - name: Create or update sticky PR comment
+        if: ${{ env.skip_comment != 'true' && env.artifact_found == 'true' }}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          REPO: ${{ github.repository }}
+          PR_NUMBER: ${{ env.cluster_perf_pr_number }}
+          REPORT_PATH: ${{ env.cluster_perf_report_path }}
+          RUN_URL: ${{ env.cluster_perf_run_url }}
+          HEAD_SHA: ${{ env.cluster_perf_head_sha }}
+        run: |
+          node <<'EOF'
+          const fs = require('node:fs');
+          const { execFileSync } = require('node:child_process');
+
+          const marker = '<!-- ghcrawl-cluster-perf -->';
+          const repo = process.env.REPO;
+          const prNumber = process.env.PR_NUMBER;
+          const reportPath = process.env.REPORT_PATH;
+          const runUrl = process.env.RUN_URL;
+          const headSha = (process.env.HEAD_SHA || '').slice(0, 7);
+
+          const report = JSON.parse(fs.readFileSync(reportPath, 'utf8'));
+          const body = `${marker}\n${report.summary}\n_Run: [workflow run](${runUrl}) for \`${headSha}\`_`;
+
+          const comments = JSON.parse(
+            execFileSync('gh', ['api', `repos/${repo}/issues/${prNumber}/comments`], { encoding: 'utf8' }),
+          );
+          const existing = comments.find(
+            (comment) => comment.user?.login === 'github-actions[bot]' && typeof comment.body === 'string' && comment.body.includes(marker),
+          );
+
+          if (existing) {
+            execFileSync(
+              'gh',
+              ['api', '--method', 'PATCH', `repos/${repo}/issues/comments/${existing.id}`, '-f', `body=${body}`],
+              { stdio: 'inherit' },
+            );
+          } else {
+            execFileSync(
+              'gh',
+              ['api', '--method', 'POST', `repos/${repo}/issues/${prNumber}/comments`, '-f', `body=${body}`],
+              { stdio: 'inherit' },
+            );
+          }
+          EOF