atomicmemory · ethanj · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/src/commands/review-show.ts b/src/commands/review-show.ts
@@ -30,4 +30,12 @@ export default async function reviewShowCommand(id: string): Promise<void> {
       output.status("!", output.warn(`[${v.severity}] ${v.message}`));
     }
   }
+
+  if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
+    console.log();
+    output.header("Provenance violations");
+    for (const v of candidate.provenanceViolations) {
+      output.status("!", output.warn(`[${v.severity}] ${v.message}`));
+    }
+  }
 }
diff --git a/src/compiler/candidates.ts b/src/compiler/candidates.ts
@@ -49,6 +49,13 @@ interface CandidateDraft {
    * Omit (or pass `undefined`) when the candidate body is clean.
    */
   schemaViolations?: LintResult[];
+  /**
+   * Provenance lint violations for the candidate body — malformed claim
+   * citations, out-of-bounds spans, or missing source files. Surfaced
+   * alongside schema violations so reviewers see citation issues before
+   * approving.
+   */
+  provenanceViolations?: LintResult[];
 }
 
 /** Build a deterministic-but-unique id from a slug and a short random suffix. */
@@ -88,6 +95,7 @@ export async function writeCandidate(
     generatedAt: new Date().toISOString(),
     ...(draft.sourceStates ? { sourceStates: draft.sourceStates } : {}),
     ...(draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}),
+    ...(draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}),
   };
 
   await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));

diff --git a/src/compiler/index.ts b/src/compiler/index.ts
@@ -48,7 +48,12 @@ import { buildBudgetedCombinedContent, type SourceSlice } from "./prompt-budget.
 import { addObsidianMeta, generateMOC } from "./obsidian.js";
 import { updateEmbeddings } from "../utils/embeddings.js";
 import { writeCandidate } from "./candidates.js";
-import { checkPageCrossLinks } from "../linter/rules.js";
+import {
+  checkPageBrokenCitations,
+  checkPageCrossLinks,
+  checkPageMalformedCitations,
+} from "../linter/rules.js";
+import type { LintResult } from "../linter/types.js";
 import { renderMergedPageContent } from "./page-renderer.js";
 import * as output from "../utils/output.js";
 import {
@@ -554,11 +559,18 @@ async function persistReviewCandidate(
   sourceStates: SourceStateMap,
   schema: SchemaConfig,
 ): Promise<MergedPageOutcome> {
-  // Run schema-aware lint against the candidate body so violations are visible
-  // in `review show` before a reviewer approves the page. The virtual file path
-  // uses the slug so diagnostics are identifiable without a real disk path.
+  // Run schema-aware AND provenance-aware lint against the candidate body so
+  // both classes of violation are visible in `review show` before a reviewer
+  // approves the page. The virtual file path uses the slug so diagnostics
+  // are identifiable without a real disk path. Provenance lint covers the
+  // citation rules that previously only ran on the post-promotion compile.
   const virtualPath = `wiki/concepts/${entry.slug}.md`;
-  const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
+  const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
+  const provenanceViolations = await collectCandidateProvenanceViolations(
+    root,
+    fullPage,
+    virtualPath,
+  );
 
   const candidate: ReviewCandidate = await writeCandidate(root, {
     title: entry.concept.concept,
@@ -567,12 +579,33 @@ async function persistReviewCandidate(
     sources: entry.sourceFiles,
     body: fullPage,
     sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
-    schemaViolations: violations.length > 0 ? violations : undefined,
+    schemaViolations: schemaViolations.length > 0 ? schemaViolations : undefined,
+    provenanceViolations:
+      provenanceViolations.length > 0 ? provenanceViolations : undefined,
   });
   output.status("?", output.info(`Candidate ready: ${candidate.id} (${entry.slug})`));
   return { candidateId: candidate.id };
 }
 
+/**
+ * Run the in-memory provenance lint rules against a candidate body:
+ * malformed claim citations + broken-source / out-of-bounds line spans.
+ * Returns the combined diagnostics so writeCandidate can persist them.
+ */
+async function collectCandidateProvenanceViolations(
+  root: string,
+  fullPage: string,
+  virtualPath: string,
+): Promise<LintResult[]> {
+  const malformed = checkPageMalformedCitations(fullPage, virtualPath);
+  const broken = await checkPageBrokenCitations(
+    fullPage,
+    virtualPath,
+    path.join(root, SOURCES_DIR),
+  );
+  return [...malformed, ...broken];
+}
+
 /**
  * Materialise schema-declared seed pages (overview, comparison, entity).
  * Each seed page is written under wiki/concepts/ next to concept pages so

diff --git a/src/linter/rules.ts b/src/linter/rules.ts
@@ -458,18 +458,47 @@ export async function checkBrokenCitations(root: string): Promise<LintResult[]>
   const pages = await collectAllPages(root);
   const sourcesDir = path.join(root, SOURCES_DIR);
   const results: LintResult[] = [];
-  /** Cache of source filename → line count to avoid repeated reads. */
   const lineCountCache = new Map<string, number>();
 
   for (const page of pages) {
-    for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
-      await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
-    }
+    const pageFindings = await checkPageBrokenCitations(
+      page.content,
+      page.filePath,
+      sourcesDir,
+      lineCountCache,
+    );
+    results.push(...pageFindings);
   }
 
   return results;
 }
 
+/**
+ * Pure-body variant of {@link checkBrokenCitations} that inspects a single
+ * page's content against an in-memory or on-disk sources directory. Used
+ * by the on-disk lint walker above, and by the in-memory candidate-lint
+ * path so `compile --review` surfaces broken-source-file and out-of-bounds
+ * span findings before a reviewer approves the candidate.
+ *
+ * @param content - Full page markdown including frontmatter.
+ * @param filePath - Logical path embedded in diagnostics (may be virtual).
+ * @param sourcesDir - Absolute path to the project's sources/ directory.
+ * @param lineCountCache - Optional cross-page cache; provide one when
+ *   linting many pages so source file line counts aren't re-read.
+ */
+export async function checkPageBrokenCitations(
+  content: string,
+  filePath: string,
+  sourcesDir: string,
+  lineCountCache: Map<string, number> = new Map(),
+): Promise<LintResult[]> {
+  const results: LintResult[] = [];
+  for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
+    await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
+  }
+  return results;
+}
+
 /** Append broken-citation diagnostics for every entry inside a single ^[...] marker. */
 async function collectBrokenForMarker(
   captured: string,
@@ -530,21 +559,31 @@ async function resolveLineCount(
 export async function checkMalformedClaimCitations(root: string): Promise<LintResult[]> {
   const pages = await collectAllPages(root);
   const results: LintResult[] = [];
-
   for (const page of pages) {
-    for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
-      for (const part of captured.split(",")) {
-        if (!isMalformedCitationEntry(part)) continue;
-        results.push({
-          rule: "malformed-claim-citation",
-          severity: "error",
-          file: page.filePath,
-          message: `Malformed claim citation ^[${captured}] — expected file.md, file.md:N-N, or file.md#LN-LN`,
-          line,
-        });
-      }
-    }
+    results.push(...checkPageMalformedCitations(page.content, page.filePath));
   }
+  return results;
+}
 
+/**
+ * Pure-body variant of {@link checkMalformedClaimCitations} that inspects
+ * a single page's content. Used by both the on-disk lint walker above and
+ * the in-memory candidate-lint path so `compile --review` surfaces
+ * malformed claim citations before a reviewer approves the candidate.
+ */
+export function checkPageMalformedCitations(content: string, filePath: string): LintResult[] {
+  const results: LintResult[] = [];
+  for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
+    for (const part of captured.split(",")) {
+      if (!isMalformedCitationEntry(part)) continue;
+      results.push({
+        rule: "malformed-claim-citation",
+        severity: "error",
+        file: filePath,
+        message: `Malformed claim citation ^[${captured}] — expected file.md, file.md:N-N, or file.md#LN-LN`,
+        line,
+      });
+    }
+  }
   return results;
 }
diff --git a/src/utils/types.ts b/src/utils/types.ts
@@ -156,6 +156,16 @@ export interface ReviewCandidate {
    * `review show` surfaces these so reviewers see failures before approving.
    */
   schemaViolations?: import("../linter/types.js").LintResult[];
+  /**
+   * Provenance lint violations detected at candidate-generation time.
+   *
+   * Covers malformed claim citations (`^[file.md:abc]`), out-of-bounds
+   * line spans, and citations referencing source files that don't exist.
+   * Surfaced in `review show` next to schema violations so reviewers
+   * catch citation issues before approving — these used to only show up
+   * on the next normal `compile` after the page was already promoted.
+   */
+  provenanceViolations?: import("../linter/types.js").LintResult[];
 }
 
 /** A single chunk citation surfaced as part of a query result. */

diff --git a/test/fixtures/review-show-helpers.ts b/test/fixtures/review-show-helpers.ts
@@ -0,0 +1,21 @@
+/**
+ * Shared helpers for tests that exercise `llmwiki review show`.
+ *
+ * Centralises the console.log spy + invocation pattern so the schema and
+ * provenance violation suites share one implementation. fallow's CI mode
+ * flags the duplicate boilerplate as a clone group otherwise.
+ */
+
+import { vi } from "vitest";
+import reviewShowCommand from "../../src/commands/review-show.js";
+
+/**
+ * Run `reviewShowCommand` for a single candidate id and return all the
+ * console.log output it produced as a single newline-joined string.
+ * Tests assert against substrings of the returned text.
+ */
+export async function captureShowOutput(candidateId: string): Promise<string> {
+  const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+  await reviewShowCommand(candidateId);
+  return logSpy.mock.calls.map((args) => args.join(" ")).join("\n");
+}