From 8729f033ea78385cedf9c1444eb284cf6c7bb637 Mon Sep 17 00:00:00 2001
From: anirudh <anirudh05snair@gmail.com>
Date: Sat, 11 Apr 2026 22:58:17 +0530
Subject: [PATCH] feat(cli): add --ref benchmark reference context output

---
 README.md                            |  11 +++
 package.json                         |   3 +-
 scripts/self-scan-stable.ts          |   6 +-
 scripts/update-reference-baseline.ts | 143 +++++++++++++++++++++++++++
 src/cli.ts                           |  11 ++-
 src/core/types.ts                    |   6 +-
 src/node-entry.ts                    |   1 +
 src/reference-baseline.ts            |  56 +++++++++++
 src/reference-context.ts             |  71 +++++++++++++
 src/reporters/json.ts                |   2 +-
 src/reporters/lint.ts                |   2 +-
 src/reporters/text.ts                |  12 ++-
 tests/fixtures-regression.test.ts    |  17 +++-
 tests/reference-context.test.ts      |  95 ++++++++++++++++++
 tests/smoke.test.ts                  |  14 ++-
 15 files changed, 437 insertions(+), 13 deletions(-)
 create mode 100644 scripts/update-reference-baseline.ts
 create mode 100644 src/reference-baseline.ts
 create mode 100644 src/reference-context.ts
 create mode 100644 tests/reference-context.test.ts

diff --git a/README.md b/README.md
index 44b0e04..35f282e 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,12 @@ Scan the current repo in lint mode:
 slop-scan scan . --lint
 ```
 
+Scan the current repo with pinned benchmark reference context:
+
+```bash
+slop-scan scan . --ref
+```
+
 Scan another repo and get JSON:
 
 ```bash
@@ -128,6 +134,7 @@ Current checks focus on patterns that often show up in unreviewed generated code
   - findings / function
 - top file hotspots
 - top directory hotspots
+- side-by-side pinned benchmark reference context with `--ref`
 - grouped lint-style findings with `--lint`
 - full-fidelity findings with evidence in `--json`
 
@@ -148,6 +155,10 @@ The repo ships with a **pinned, recreatable benchmark set** comparing known AI-g
 
 **Blended score** = geometric mean of the six normalized-metric ratios versus the mature OSS cohort medians, then rescaled so the mature OSS cohort median is **1.00**. Higher means a repo is consistently noisier across the benchmark dimensions.
 
+Use `--ref` with the default text output to compare a scanned repo's normalized metrics side by side with the pinned cohort medians.
+
+The CLI reference context is packaged as `src/reference-baseline.ts`, generated from the pinned benchmark snapshot with `bun run reference:update`.
+
 ### Cohort medians
 
 | Metric              | AI median | Mature OSS median |     Ratio |
diff --git a/package.json b/package.json
index 2b45b6a..3207a1e 100644
--- a/package.json
+++ b/package.json
@@ -48,10 +48,11 @@
     "test": "bun test",
     "prepare": "husky",
     "prepack": "bun run build",
+    "reference:update": "bun run scripts/update-reference-baseline.ts",
     "benchmark:fetch": "bun run scripts/benchmark-fetch.ts",
     "benchmark:scan": "bun run scripts/benchmark-scan.ts",
     "benchmark:report": "bun run scripts/benchmark-report.ts",
-    "benchmark:update": "bun run benchmark:fetch && bun run benchmark:scan && bun run benchmark:report"
+    "benchmark:update": "bun run benchmark:fetch && bun run benchmark:scan && bun run benchmark:report && bun run reference:update"
   },
   "dependencies": {
     "globby": "^16.2.0",
diff --git a/scripts/self-scan-stable.ts b/scripts/self-scan-stable.ts
index 3f20d41..84ee101 100644
--- a/scripts/self-scan-stable.ts
+++ b/scripts/self-scan-stable.ts
@@ -36,6 +36,7 @@ const STABLE_PACKAGE_PATH = path.resolve("node_modules/slop-scan-stable/package.
 const STABLE_BIN_PATH = path.resolve("node_modules/slop-scan-stable/bin/slop-scan.js");
 const UPDATE_FLAG = "--update";
 const SCORE_EPSILON = 1e-9;
+const STABLE_SCAN_MAX_BUFFER = 10 * 1024 * 1024;
 
 function countRuleHits(report: ScanReport): Record<string, number> {
   const counts = new Map<string, number>();
@@ -61,13 +62,14 @@ function runStableScan(): ScanReport {
   const result = spawnSync("node", [STABLE_BIN_PATH, "scan", ".", "--json"], {
     cwd: process.cwd(),
     encoding: "utf8",
+    maxBuffer: STABLE_SCAN_MAX_BUFFER,
   });
 
   if (result.status !== 0) {
-    if (result.stdout.length > 0) {
+    if (result.stdout?.length > 0) {
       console.log(result.stdout.trimEnd());
     }
-    if (result.stderr.length > 0) {
+    if (result.stderr?.length > 0) {
       console.error(result.stderr.trimEnd());
     }
 
diff --git a/scripts/update-reference-baseline.ts b/scripts/update-reference-baseline.ts
new file mode 100644
index 0000000..a330706
--- /dev/null
+++ b/scripts/update-reference-baseline.ts
@@ -0,0 +1,143 @@
+import { mkdir, readFile, writeFile } from "node:fs/promises";
+import path from "node:path";
+import {
+  DEFAULT_BENCHMARK_SET_PATH,
+  loadBenchmarkSet,
+  resolveProjectPath,
+} from "../src/benchmarks/manifest";
+import type { BenchmarkSnapshot } from "../src/benchmarks/types";
+import { getOption } from "./lib/get-option";
+
+interface ReferenceBaselinePayload {
+  benchmarkSetId: string;
+  benchmarkSetName: string;
+  generatedAt: string;
+  analyzerVersion: string;
+  configMode: "default";
+  cohorts: {
+    explicitAi: {
+      label: string;
+      repoCount: number;
+      medians: BenchmarkSnapshot["cohorts"]["explicit-ai"]["medians"];
+      blendedScoreMedian: number | null;
+    };
+    matureOss: {
+      label: string;
+      repoCount: number;
+      medians: BenchmarkSnapshot["cohorts"]["mature-oss"]["medians"];
+      blendedScoreMedian: number | null;
+    };
+  };
+}
+
+function renderReferenceBaseline(payload: ReferenceBaselinePayload): string {
+  const explicitAi = payload.cohorts.explicitAi;
+  const matureOss = payload.cohorts.matureOss;
+
+  return [
+    'import type { NormalizedMetrics } from "./core/types";',
+    "",
+    "export interface ReferenceBenchmarkCohort {",
+    "  label: string;",
+    "  repoCount: number;",
+    "  medians: NormalizedMetrics;",
+    "  blendedScoreMedian: number | null;",
+    "}",
+    "",
+    "export interface ReferenceBaseline {",
+    "  benchmarkSetId: string;",
+    "  benchmarkSetName: string;",
+    "  generatedAt: string;",
+    "  analyzerVersion: string;",
+    '  configMode: "default";',
+    "  cohorts: {",
+    "    explicitAi: ReferenceBenchmarkCohort;",
+    "    matureOss: ReferenceBenchmarkCohort;",
+    "  };",
+    "}",
+    "",
+    "export const DEFAULT_REFERENCE_BASELINE = {",
+    `  benchmarkSetId: ${JSON.stringify(payload.benchmarkSetId)},`,
+    `  benchmarkSetName: ${JSON.stringify(payload.benchmarkSetName)},`,
+    `  generatedAt: ${JSON.stringify(payload.generatedAt)},`,
+    `  analyzerVersion: ${JSON.stringify(payload.analyzerVersion)},`,
+    `  configMode: ${JSON.stringify(payload.configMode)},`,
+    "  cohorts: {",
+    "    explicitAi: {",
+    `      label: ${JSON.stringify(explicitAi.label)},`,
+    `      repoCount: ${explicitAi.repoCount},`,
+    "      medians: {",
+    renderMetrics(explicitAi.medians, "        "),
+    "      },",
+    `      blendedScoreMedian: ${formatNullableNumber(explicitAi.blendedScoreMedian)},`,
+    "    },",
+    "    matureOss: {",
+    `      label: ${JSON.stringify(matureOss.label)},`,
+    `      repoCount: ${matureOss.repoCount},`,
+    "      medians: {",
+    renderMetrics(matureOss.medians, "        "),
+    "      },",
+    `      blendedScoreMedian: ${formatNullableNumber(matureOss.blendedScoreMedian)},`,
+    "    },",
+    "  },",
+    "} satisfies ReferenceBaseline;",
+    "",
+  ].join("\n");
+}
+
+function formatNullableNumber(value: number | null): string {
+  return value === null ? "null" : String(value);
+}
+
+function renderMetrics(
+  metrics: BenchmarkSnapshot["cohorts"]["explicit-ai"]["medians"],
+  indent: string,
+): string {
+  return [
+    `${indent}scorePerFile: ${formatNullableNumber(metrics.scorePerFile)},`,
+    `${indent}scorePerKloc: ${formatNullableNumber(metrics.scorePerKloc)},`,
+    `${indent}scorePerFunction: ${formatNullableNumber(metrics.scorePerFunction)},`,
+    `${indent}findingsPerFile: ${formatNullableNumber(metrics.findingsPerFile)},`,
+    `${indent}findingsPerKloc: ${formatNullableNumber(metrics.findingsPerKloc)},`,
+    `${indent}findingsPerFunction: ${formatNullableNumber(metrics.findingsPerFunction)},`,
+  ].join("\n");
+}
+
+const manifestPath = getOption(process.argv.slice(2), "--manifest", DEFAULT_BENCHMARK_SET_PATH);
+const benchmarkSet = await loadBenchmarkSet(manifestPath);
+const snapshotPath = resolveProjectPath(benchmarkSet.artifacts.snapshotPath);
+const targetPath = resolveProjectPath("src/reference-baseline.ts");
+const snapshot = JSON.parse(await readFile(snapshotPath, "utf8")) as BenchmarkSnapshot;
+
+if (snapshot.benchmarkSetId !== benchmarkSet.id) {
+  throw new Error(
+    `Benchmark snapshot id mismatch: expected ${benchmarkSet.id}, got ${snapshot.benchmarkSetId}`,
+  );
+}
+
+const baseline: ReferenceBaselinePayload = {
+  benchmarkSetId: snapshot.benchmarkSetId,
+  benchmarkSetName: snapshot.benchmarkSetName,
+  generatedAt: snapshot.generatedAt,
+  analyzerVersion: snapshot.analyzerVersion,
+  configMode: snapshot.configMode,
+  cohorts: {
+    explicitAi: {
+      label: "Explicit AI median",
+      repoCount: snapshot.cohorts["explicit-ai"].repoCount,
+      medians: snapshot.cohorts["explicit-ai"].medians,
+      blendedScoreMedian: snapshot.cohorts["explicit-ai"].blendedScoreMedian,
+    },
+    matureOss: {
+      label: "Mature OSS median",
+      repoCount: snapshot.cohorts["mature-oss"].repoCount,
+      medians: snapshot.cohorts["mature-oss"].medians,
+      blendedScoreMedian: snapshot.cohorts["mature-oss"].blendedScoreMedian,
+    },
+  },
+};
+
+await mkdir(path.dirname(targetPath), { recursive: true });
+await writeFile(targetPath, renderReferenceBaseline(baseline));
+
+console.log(`Wrote reference baseline to ${targetPath}`);
diff --git a/src/cli.ts b/src/cli.ts
index 559480e..20d2da3 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -16,6 +16,7 @@ export function formatHelp(): string {
     "  -h, --help          Show help",
     "  --json              Output results as JSON",
     "  --lint              Output results in lint format",
+    "  --ref               Include pinned benchmark reference context in text output",
     "  --ignore <pattern>  Glob pattern to ignore (repeatable)",
     "",
     "Examples:",
@@ -29,6 +30,7 @@ export interface CliArgs {
   help: boolean;
   json: boolean;
   lint: boolean;
+  ref: boolean;
   ignore: string[];
   command: string | undefined;
   target: string;
@@ -41,6 +43,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
       help: { type: "boolean", short: "h", default: false },
       json: { type: "boolean", default: false },
       lint: { type: "boolean", default: false },
+      ref: { type: "boolean", default: false },
       ignore: { type: "string", multiple: true, default: [] },
     },
     allowPositionals: true,
@@ -53,6 +56,7 @@ export function parseCliArgs(argv: string[]): CliArgs {
     help: values.help,
     json: values.json,
     lint: values.lint,
+    ref: values.ref,
     ignore: values.ignore,
     command: command,
     target: target,
@@ -78,6 +82,11 @@ export async function run(argv: string[]): Promise<number> {
     return 1;
   }
 
+  if (args.ref && (args.json || args.lint)) {
+    console.error("--ref can only be used with default text output.");
+    return 1;
+  }
+
   const rootDir = path.resolve(args.target);
   const loadedConfig = await loadConfigFile(rootDir);
   const config = loadedConfig.config;
@@ -93,7 +102,7 @@ export async function run(argv: string[]): Promise<number> {
 
   const result = await analyzeRepository(rootDir, config, registry);
   const reporter = registry.getReporter(args.json ? "json" : args.lint ? "lint" : "text");
-  const output = await reporter.render(result);
+  const output = await reporter.render(result, { reference: args.ref });
 
   if (output.length > 0) {
     console.log(output);
diff --git a/src/core/types.ts b/src/core/types.ts
index ade4210..1cfa97b 100644
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -106,7 +106,11 @@ export interface RulePlugin extends ProviderBase {
 
 export interface ReporterPlugin {
   id: string;
-  render(result: AnalysisResult): Promise<string> | string;
+  render(result: AnalysisResult, options?: ReporterOptions): Promise<string> | string;
+}
+
+export interface ReporterOptions {
+  reference?: boolean;
 }
 
 export interface AnalyzerRuntime {
diff --git a/src/node-entry.ts b/src/node-entry.ts
index b0f12f8..6f8f1bd 100644
--- a/src/node-entry.ts
+++ b/src/node-entry.ts
@@ -28,6 +28,7 @@ export type {
   LanguagePlugin,
   ProviderContext,
   ReporterPlugin,
+  ReporterOptions,
   RulePlugin,
   Scope,
 } from "./core/types";
diff --git a/src/reference-baseline.ts b/src/reference-baseline.ts
new file mode 100644
index 0000000..7ab8936
--- /dev/null
+++ b/src/reference-baseline.ts
@@ -0,0 +1,56 @@
+import type { NormalizedMetrics } from "./core/types";
+
+export interface ReferenceBenchmarkCohort {
+  label: string;
+  repoCount: number;
+  medians: NormalizedMetrics;
+  blendedScoreMedian: number | null;
+}
+
+export interface ReferenceBaseline {
+  benchmarkSetId: string;
+  benchmarkSetName: string;
+  generatedAt: string;
+  analyzerVersion: string;
+  configMode: "default";
+  cohorts: {
+    explicitAi: ReferenceBenchmarkCohort;
+    matureOss: ReferenceBenchmarkCohort;
+  };
+}
+
+export const DEFAULT_REFERENCE_BASELINE = {
+  benchmarkSetId: "known-ai-vs-solid-oss",
+  benchmarkSetName: "Known AI repos vs older solid OSS repos",
+  generatedAt: "2026-04-09T00:24:29.081Z",
+  analyzerVersion: "0.2.0",
+  configMode: "default",
+  cohorts: {
+    explicitAi: {
+      label: "Explicit AI median",
+      repoCount: 9,
+      medians: {
+        scorePerFile: 0.9875000000000002,
+        scorePerKloc: 9.510586363885691,
+        scorePerFunction: 0.2286514601096154,
+        findingsPerFile: 0.30851063829787234,
+        findingsPerKloc: 2.96198782293895,
+        findingsPerFunction: 0.0842173094081491,
+      },
+      blendedScoreMedian: 3.476442610225084,
+    },
+    matureOss: {
+      label: "Mature OSS median",
+      repoCount: 8,
+      medians: {
+        scorePerFile: 0.19086548662498448,
+        scorePerKloc: 4.422142801664107,
+        scorePerFunction: 0.09195482875096181,
+        findingsPerFile: 0.06947805977819406,
+        findingsPerKloc: 1.3961844005945103,
+        findingsPerFunction: 0.02817460317460317,
+      },
+      blendedScoreMedian: 0.9999999999999999,
+    },
+  },
+} satisfies ReferenceBaseline;
diff --git a/src/reference-context.ts b/src/reference-context.ts
new file mode 100644
index 0000000..090f2af
--- /dev/null
+++ b/src/reference-context.ts
@@ -0,0 +1,71 @@
+import type { AnalysisResult, NormalizedMetrics } from "./core/types";
+import { DEFAULT_REFERENCE_BASELINE } from "./reference-baseline";
+
+interface ReferenceMetric {
+  key: keyof NormalizedMetrics;
+  label: string;
+}
+
+const REFERENCE_METRICS: ReferenceMetric[] = [
+  { key: "scorePerFile", label: "score / file" },
+  { key: "scorePerKloc", label: "score / KLOC" },
+  { key: "scorePerFunction", label: "score / function" },
+  { key: "findingsPerFile", label: "findings / file" },
+  { key: "findingsPerKloc", label: "findings / KLOC" },
+  { key: "findingsPerFunction", label: "findings / function" },
+];
+
+function divideOrNull(numerator: number | null, denominator: number | null): number | null {
+  return numerator !== null && denominator !== null && denominator !== 0
+    ? numerator / denominator
+    : null;
+}
+
+function formatMetric(value: number | null): string {
+  return value === null ? "n/a" : value.toFixed(2);
+}
+
+function formatRatio(value: number | null): string {
+  return value === null ? "n/a" : `${value.toFixed(2)}x`;
+}
+
+function formatRow(values: string[], widths: number[]): string {
+  return values
+    .map((value, index) =>
+      index === 0 ? value.padEnd(widths[index] ?? 0) : value.padStart(widths[index] ?? 0),
+    )
+    .join("  ");
+}
+
+export function renderReferenceContext(result: AnalysisResult): string[] {
+  const baseline = DEFAULT_REFERENCE_BASELINE;
+  const matureOss = baseline.cohorts.matureOss.medians;
+  const explicitAi = baseline.cohorts.explicitAi.medians;
+  const rows = REFERENCE_METRICS.map((metric) => {
+    const repoValue = result.summary.normalized[metric.key];
+    const matureValue = matureOss[metric.key];
+    const explicitAiValue = explicitAi[metric.key];
+    return [
+      metric.label,
+      formatMetric(repoValue),
+      formatMetric(matureValue),
+      formatRatio(divideOrNull(repoValue, matureValue)),
+      formatMetric(explicitAiValue),
+    ];
+  });
+  const header = ["Metric", "This repo", "Mature median", "vs mature", "AI median"];
+  const widths = header.map((value, index) =>
+    Math.max(value.length, ...rows.map((row) => row[index]?.length ?? 0)),
+  );
+
+  return [
+    "",
+    `Reference context: ${baseline.benchmarkSetName} (analyzer ${baseline.analyzerVersion}, ${baseline.configMode} config)`,
+    formatRow(header, widths),
+    formatRow(
+      widths.map((width) => "-".repeat(width)),
+      widths,
+    ),
+    ...rows.map((row) => formatRow(row, widths)),
+  ];
+}
diff --git a/src/reporters/json.ts b/src/reporters/json.ts
index 0415941..23d618a 100644
--- a/src/reporters/json.ts
+++ b/src/reporters/json.ts
@@ -2,7 +2,7 @@ import type { AnalysisResult, ReporterPlugin } from "../core/types";
 
 export const jsonReporter: ReporterPlugin = {
   id: "json",
-  render(result: AnalysisResult): string {
+  render(result: AnalysisResult, _options): string {
     return JSON.stringify(
       {
         rootDir: result.rootDir,
diff --git a/src/reporters/lint.ts b/src/reporters/lint.ts
index 628c643..57629b5 100644
--- a/src/reporters/lint.ts
+++ b/src/reporters/lint.ts
@@ -107,7 +107,7 @@ function renderFinding(finding: Finding): string {
 
 export const lintReporter: ReporterPlugin = {
   id: "lint",
-  render(result: AnalysisResult): string {
+  render(result: AnalysisResult, _options): string {
     const renderedFindings = [...result.findings]
       .sort(compareFindings)
       .map(renderFinding)
diff --git a/src/reporters/text.ts b/src/reporters/text.ts
index bf01681..f183395 100644
--- a/src/reporters/text.ts
+++ b/src/reporters/text.ts
@@ -1,4 +1,5 @@
 import type { AnalysisResult, ReporterPlugin } from "../core/types";
+import { renderReferenceContext } from "../reference-context";
 
 function formatMetric(value: number | null): string {
   return value === null ? "n/a" : value.toFixed(2);
@@ -6,7 +7,7 @@ function formatMetric(value: number | null): string {
 
 export const textReporter: ReporterPlugin = {
   id: "text",
-  render(result: AnalysisResult): string {
+  render(result: AnalysisResult, options): string {
     const { summary } = result;
     const lines = [
       "slop-scan report",
@@ -24,11 +25,18 @@ export const textReporter: ReporterPlugin = {
       `- findings / file: ${formatMetric(summary.normalized.findingsPerFile)}`,
       `- findings / KLOC (logical): ${formatMetric(summary.normalized.findingsPerKloc)}`,
       `- findings / function: ${formatMetric(summary.normalized.findingsPerFunction)}`,
+    ];
+
+    if (options?.reference) {
+      lines.push(...renderReferenceContext(result));
+    }
+
+    lines.push(
       "",
       "Raw totals:",
       `- findings: ${summary.findingCount}`,
       `- repo score: ${summary.repoScore.toFixed(2)}`,
-    ];
+    );
 
     if (result.fileScores.length > 0) {
       lines.push("", "File hotspots:");
diff --git a/tests/fixtures-regression.test.ts b/tests/fixtures-regression.test.ts
index 9fbe55f..ac4abb6 100644
--- a/tests/fixtures-regression.test.ts
+++ b/tests/fixtures-regression.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, test } from "bun:test";
 import path from "node:path";
 import { spawnSync } from "node:child_process";
+import fs from "node:fs";
 import { analyzeRepository } from "../src/core/engine";
 import { DEFAULT_CONFIG } from "../src/config";
 import { createDefaultRegistry } from "../src/default-registry";
@@ -9,6 +10,16 @@ function fixturePath(name: string): string {
   return path.join(process.cwd(), "tests", "fixtures", "repos", name);
 }
 
+function getBunExe(): string {
+  // Try common Windows Bun installation path first
+  const windowsBunPath = path.join(process.env.USERPROFILE ?? "", ".bun", "bin", "bun.exe");
+  if (fs.existsSync(windowsBunPath)) {
+    return windowsBunPath;
+  }
+  // Fallback to just "bun" (will work if it's in PATH)
+  return "bun";
+}
+
 describe("fixture regression suite", () => {
   test("clean fixture stays quiet", async () => {
     const result = await analyzeRepository(
@@ -63,7 +74,7 @@ describe("fixture regression suite", () => {
 
   test("CLI JSON output matches the slop-heavy fixture summary", () => {
     const output = spawnSync(
-      "bun",
+      getBunExe(),
       ["run", "src/cli.ts", "scan", fixturePath("slop-heavy"), "--json"],
       {
         encoding: "utf8",
@@ -81,7 +92,7 @@ describe("fixture regression suite", () => {
 
   test("CLI lint output lists grouped rule hits with locations", () => {
     const output = spawnSync(
-      "bun",
+      getBunExe(),
       ["run", "src/cli.ts", "scan", fixturePath("slop-heavy"), "--lint"],
       {
         encoding: "utf8",
@@ -107,7 +118,7 @@ describe("fixture regression suite", () => {
 
   test("CLI rejects --json and --lint together", () => {
     const output = spawnSync(
-      "bun",
+      getBunExe(),
       ["run", "src/cli.ts", "scan", fixturePath("slop-heavy"), "--json", "--lint"],
       {
         encoding: "utf8",
diff --git a/tests/reference-context.test.ts b/tests/reference-context.test.ts
new file mode 100644
index 0000000..95c1063
--- /dev/null
+++ b/tests/reference-context.test.ts
@@ -0,0 +1,95 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import path from "node:path";
+import type { BenchmarkSnapshot } from "../src/benchmarks/types";
+import type { AnalysisResult, NormalizedMetrics } from "../src/core/types";
+import { DEFAULT_REFERENCE_BASELINE } from "../src/reference-baseline";
+import { textReporter } from "../src/reporters/text";
+
+function doubleMetric(value: number | null): number | null {
+  return value === null ? null : value * 2;
+}
+
+function createResultWithDoubleMatureMedian(): AnalysisResult {
+  const matureMedian = DEFAULT_REFERENCE_BASELINE.cohorts.matureOss.medians;
+  const normalized: NormalizedMetrics = {
+    scorePerFile: doubleMetric(matureMedian.scorePerFile),
+    scorePerKloc: doubleMetric(matureMedian.scorePerKloc),
+    scorePerFunction: doubleMetric(matureMedian.scorePerFunction),
+    findingsPerFile: doubleMetric(matureMedian.findingsPerFile),
+    findingsPerKloc: doubleMetric(matureMedian.findingsPerKloc),
+    findingsPerFunction: doubleMetric(matureMedian.findingsPerFunction),
+  };
+
+  return {
+    rootDir: "/tmp/example",
+    config: { ignores: [], rules: {}, thresholds: {} },
+    summary: {
+      fileCount: 10,
+      directoryCount: 1,
+      findingCount: 2,
+      repoScore: 10,
+      physicalLineCount: 100,
+      logicalLineCount: 80,
+      functionCount: 20,
+      normalized,
+    },
+    files: [],
+    directories: [],
+    findings: [],
+    fileScores: [],
+    directoryScores: [],
+    repoScore: 10,
+  };
+}
+
+describe("reference context", () => {
+  test("generated baseline matches the pinned benchmark snapshot medians", () => {
+    const snapshotPath = path.join(
+      process.cwd(),
+      "benchmarks",
+      "results",
+      "known-ai-vs-solid-oss.json",
+    );
+    const snapshot = JSON.parse(readFileSync(snapshotPath, "utf8")) as BenchmarkSnapshot;
+
+    expect(DEFAULT_REFERENCE_BASELINE.benchmarkSetId).toBe(snapshot.benchmarkSetId);
+    expect(DEFAULT_REFERENCE_BASELINE.benchmarkSetName).toBe(snapshot.benchmarkSetName);
+    expect(DEFAULT_REFERENCE_BASELINE.generatedAt).toBe(snapshot.generatedAt);
+    expect(DEFAULT_REFERENCE_BASELINE.analyzerVersion).toBe(snapshot.analyzerVersion);
+    expect(DEFAULT_REFERENCE_BASELINE.configMode).toBe(snapshot.configMode);
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.explicitAi.repoCount).toBe(
+      snapshot.cohorts["explicit-ai"].repoCount,
+    );
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.explicitAi.medians).toEqual(
+      snapshot.cohorts["explicit-ai"].medians,
+    );
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.explicitAi.blendedScoreMedian).toBe(
+      snapshot.cohorts["explicit-ai"].blendedScoreMedian,
+    );
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.matureOss.repoCount).toBe(
+      snapshot.cohorts["mature-oss"].repoCount,
+    );
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.matureOss.medians).toEqual(
+      snapshot.cohorts["mature-oss"].medians,
+    );
+    expect(DEFAULT_REFERENCE_BASELINE.cohorts.matureOss.blendedScoreMedian).toBe(
+      snapshot.cohorts["mature-oss"].blendedScoreMedian,
+    );
+  });
+
+  test("text reporter shows side-by-side benchmark context only when requested", () => {
+    const result = createResultWithDoubleMatureMedian();
+    const defaultOutput = textReporter.render(result) as string;
+    const referenceOutput = textReporter.render(result, { reference: true }) as string;
+
+    expect(defaultOutput).not.toContain("Reference context");
+    expect(referenceOutput).toContain("Reference context");
+    expect(referenceOutput).toContain("Metric");
+    expect(referenceOutput).toContain("This repo");
+    expect(referenceOutput).toContain("Mature median");
+    expect(referenceOutput).toContain("AI median");
+    expect(referenceOutput).toContain("score / file");
+    expect(referenceOutput).toContain("2.00x");
+  });
+});
diff --git a/tests/smoke.test.ts b/tests/smoke.test.ts
index 53ed085..0bedb81 100644
--- a/tests/smoke.test.ts
+++ b/tests/smoke.test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, test } from "bun:test";
 import path from "node:path";
 import { DEFAULT_CONFIG, loadConfig } from "../src/config";
-import { formatHelp, run } from "../src/cli";
+import { formatHelp, parseCliArgs, run } from "../src/cli";
 
 describe("project scaffold", () => {
   test("help text stays focused on usage", () => {
@@ -9,11 +9,16 @@ describe("project scaffold", () => {
     expect(formatHelp()).toContain("scan");
     expect(formatHelp()).toContain("--lint");
     expect(formatHelp()).toContain("--ignore");
+    expect(formatHelp()).toContain("--ref");
     expect(formatHelp()).toContain("--help");
     expect(formatHelp()).not.toContain("Development:");
     expect(formatHelp()).not.toContain("Implemented today:");
   });
 
+  test("parses reference context flag", () => {
+    expect(parseCliArgs(["scan", ".", "--ref"]).ref).toBe(true);
+  });
+
   test("loadConfig returns defaults when config file is absent", async () => {
     const fixtureRoot = path.join(process.cwd(), "tests", "fixtures", "repos", "clean");
     const config = await loadConfig(fixtureRoot);
@@ -25,4 +30,11 @@ describe("project scaffold", () => {
     const exitCode = await run(["scan", fixtureRoot]);
     expect(exitCode).toBe(0);
   });
+
+  test("--ref is limited to default text output", async () => {
+    const fixtureRoot = path.join(process.cwd(), "tests", "fixtures", "repos", "clean");
+    const exitCode = await run(["scan", fixtureRoot, "--json", "--ref"]);
+
+    expect(exitCode).toBe(1);
+  });
 });