Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/commands/review-show.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,12 @@ export default async function reviewShowCommand(id: string): Promise<void> {
output.status("!", output.warn(`[${v.severity}] ${v.message}`));
}
}

if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
console.log();
output.header("Provenance violations");
for (const v of candidate.provenanceViolations) {
output.status("!", output.warn(`[${v.severity}] ${v.message}`));
}
}
}
8 changes: 8 additions & 0 deletions src/compiler/candidates.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ interface CandidateDraft {
* Omit (or pass `undefined`) when the candidate body is clean.
*/
schemaViolations?: LintResult[];
/**
* Provenance lint violations for the candidate body — malformed claim
* citations, out-of-bounds spans, or missing source files. Surfaced
* alongside schema violations so reviewers see citation issues before
* approving.
*/
provenanceViolations?: LintResult[];
}

/** Build a deterministic-but-unique id from a slug and a short random suffix. */
Expand Down Expand Up @@ -88,6 +95,7 @@ export async function writeCandidate(
generatedAt: new Date().toISOString(),
...(draft.sourceStates ? { sourceStates: draft.sourceStates } : {}),
...(draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}),
...(draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}),
};

await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
Expand Down
45 changes: 39 additions & 6 deletions src/compiler/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@ import { buildBudgetedCombinedContent, type SourceSlice } from "./prompt-budget.
import { addObsidianMeta, generateMOC } from "./obsidian.js";
import { updateEmbeddings } from "../utils/embeddings.js";
import { writeCandidate } from "./candidates.js";
import { checkPageCrossLinks } from "../linter/rules.js";
import {
checkPageBrokenCitations,
checkPageCrossLinks,
checkPageMalformedCitations,
} from "../linter/rules.js";
import type { LintResult } from "../linter/types.js";
import { renderMergedPageContent } from "./page-renderer.js";
import * as output from "../utils/output.js";
import {
Expand Down Expand Up @@ -554,11 +559,18 @@ async function persistReviewCandidate(
sourceStates: SourceStateMap,
schema: SchemaConfig,
): Promise<MergedPageOutcome> {
// Run schema-aware lint against the candidate body so violations are visible
// in `review show` before a reviewer approves the page. The virtual file path
// uses the slug so diagnostics are identifiable without a real disk path.
// Run schema-aware AND provenance-aware lint against the candidate body so
// both classes of violation are visible in `review show` before a reviewer
// approves the page. The virtual file path uses the slug so diagnostics
// are identifiable without a real disk path. Provenance lint covers the
// citation rules that previously only ran on the post-promotion compile.
const virtualPath = `wiki/concepts/${entry.slug}.md`;
const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
const provenanceViolations = await collectCandidateProvenanceViolations(
root,
fullPage,
virtualPath,
);

const candidate: ReviewCandidate = await writeCandidate(root, {
title: entry.concept.concept,
Expand All @@ -567,12 +579,33 @@ async function persistReviewCandidate(
sources: entry.sourceFiles,
body: fullPage,
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
schemaViolations: violations.length > 0 ? violations : undefined,
schemaViolations: schemaViolations.length > 0 ? schemaViolations : undefined,
provenanceViolations:
provenanceViolations.length > 0 ? provenanceViolations : undefined,
});
output.status("?", output.info(`Candidate ready: ${candidate.id} (${entry.slug})`));
return { candidateId: candidate.id };
}

/**
* Run the in-memory provenance lint rules against a candidate body:
* malformed claim citations + broken-source / out-of-bounds line spans.
* Returns the combined diagnostics so writeCandidate can persist them.
*/
async function collectCandidateProvenanceViolations(
root: string,
fullPage: string,
virtualPath: string,
): Promise<LintResult[]> {
const malformed = checkPageMalformedCitations(fullPage, virtualPath);
const broken = await checkPageBrokenCitations(
fullPage,
virtualPath,
path.join(root, SOURCES_DIR),
);
return [...malformed, ...broken];
}

/**
* Materialise schema-declared seed pages (overview, comparison, entity).
* Each seed page is written under wiki/concepts/ next to concept pages so
Expand Down
73 changes: 56 additions & 17 deletions src/linter/rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -458,18 +458,47 @@ export async function checkBrokenCitations(root: string): Promise<LintResult[]>
const pages = await collectAllPages(root);
const sourcesDir = path.join(root, SOURCES_DIR);
const results: LintResult[] = [];
/** Cache of source filename → line count to avoid repeated reads. */
const lineCountCache = new Map<string, number>();

for (const page of pages) {
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
}
const pageFindings = await checkPageBrokenCitations(
page.content,
page.filePath,
sourcesDir,
lineCountCache,
);
results.push(...pageFindings);
}

return results;
}

/**
* Pure-body variant of {@link checkBrokenCitations} that inspects a single
* page's content against an in-memory or on-disk sources directory. Used
* by the on-disk lint walker above, and by the in-memory candidate-lint
* path so `compile --review` surfaces broken-source-file and out-of-bounds
* span findings before a reviewer approves the candidate.
*
* @param content - Full page markdown including frontmatter.
* @param filePath - Logical path embedded in diagnostics (may be virtual).
* @param sourcesDir - Absolute path to the project's sources/ directory.
* @param lineCountCache - Optional cross-page cache; provide one when
* linting many pages so source file line counts aren't re-read.
*/
export async function checkPageBrokenCitations(
content: string,
filePath: string,
sourcesDir: string,
lineCountCache: Map<string, number> = new Map(),
): Promise<LintResult[]> {
const results: LintResult[] = [];
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
}
return results;
}

/** Append broken-citation diagnostics for every entry inside a single ^[...] marker. */
async function collectBrokenForMarker(
captured: string,
Expand Down Expand Up @@ -530,21 +559,31 @@ async function resolveLineCount(
export async function checkMalformedClaimCitations(root: string): Promise<LintResult[]> {
const pages = await collectAllPages(root);
const results: LintResult[] = [];

for (const page of pages) {
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
for (const part of captured.split(",")) {
if (!isMalformedCitationEntry(part)) continue;
results.push({
rule: "malformed-claim-citation",
severity: "error",
file: page.filePath,
message: `Malformed claim citation ^[${captured}] — expected file.md, file.md:N-N, or file.md#LN-LN`,
line,
});
}
}
results.push(...checkPageMalformedCitations(page.content, page.filePath));
}
return results;
}

/**
* Pure-body variant of {@link checkMalformedClaimCitations} that inspects
* a single page's content. Used by both the on-disk lint walker above and
* the in-memory candidate-lint path so `compile --review` surfaces
* malformed claim citations before a reviewer approves the candidate.
*/
export function checkPageMalformedCitations(content: string, filePath: string): LintResult[] {
const results: LintResult[] = [];
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
for (const part of captured.split(",")) {
if (!isMalformedCitationEntry(part)) continue;
results.push({
rule: "malformed-claim-citation",
severity: "error",
file: filePath,
message: `Malformed claim citation ^[${captured}] — expected file.md, file.md:N-N, or file.md#LN-LN`,
line,
});
}
}
return results;
}
10 changes: 10 additions & 0 deletions src/utils/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,16 @@ export interface ReviewCandidate {
* `review show` surfaces these so reviewers see failures before approving.
*/
schemaViolations?: import("../linter/types.js").LintResult[];
/**
* Provenance lint violations detected at candidate-generation time.
*
* Covers malformed claim citations (`^[file.md:abc]`), out-of-bounds
* line spans, and citations referencing source files that don't exist.
* Surfaced in `review show` next to schema violations so reviewers
* catch citation issues before approving — these used to only show up
* on the next normal `compile` after the page was already promoted.
*/
provenanceViolations?: import("../linter/types.js").LintResult[];
}

/** A single chunk citation surfaced as part of a query result. */
Expand Down
21 changes: 21 additions & 0 deletions test/fixtures/review-show-helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Shared helpers for tests that exercise `llmwiki review show`.
*
* Centralises the console.log spy + invocation pattern so the schema and
* provenance violation suites share one implementation. fallow's CI mode
* flags the duplicate boilerplate as a clone group otherwise.
*/

import { vi } from "vitest";
import reviewShowCommand from "../../src/commands/review-show.js";

/**
* Run `reviewShowCommand` for a single candidate id and return all the
* console.log output it produced as a single newline-joined string.
* Tests assert against substrings of the returned text.
*/
export async function captureShowOutput(candidateId: string): Promise<string> {
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
await reviewShowCommand(candidateId);
return logSpy.mock.calls.map((args) => args.join(" ")).join("\n");
}
Loading
Loading