From e8fa12325847aa5faf0eb0061d8661eb34fe662e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 00:03:43 +0700 Subject: [PATCH 01/13] feat: [ENG-2518] batch abstract generation across queued files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse the abstract queue's per-file LLM calls (2N today: one L0 .abstract.md + one L1 .overview.md per file) into batched calls of up to BATCH_SIZE_CAP=5 files per cycle, with two parallel LLM calls per cycle (1 batched L0 + 1 batched L1). - generateFileAbstractsBatch(items, generator) in abstract-generator.ts: XML-tagged input/output, tolerant regex parser, missing paths get empty strings. - AbstractGenerationQueue: buffer-until-cap-or-drain — enqueue only triggers scheduleNext when pending >= cap or drainRequested. drain() sets the flag to flush partial batches at curate-end. - Per-batch failure → re-enqueue each item individually with its own attempts counter; existing 3x exponential backoff preserved. - Per-file content cap (20 KB) matches the non-batched path. Real-LLM verification on ByteRover paid-tier: 5-pack (5 files): 1 batch, 2 LLM calls (vs 10 non-batched, -80%) 6-distinct (6 files): 2 batches (5+1), 4 LLM calls (vs 12, -67%) 10-pack: per-batch failure + retry path exercised end-to-end --- src/agent/infra/map/abstract-generator.ts | 162 ++++++++++++++++++ src/agent/infra/map/abstract-queue.ts | 112 +++++++++--- .../map/abstract-generator-batch.test.ts | 132 ++++++++++++++ test/unit/agent/map/abstract-queue.test.ts | 109 ++++++++++-- 4 files changed, 479 insertions(+), 36 deletions(-) create mode 100644 test/unit/agent/map/abstract-generator-batch.test.ts diff --git a/src/agent/infra/map/abstract-generator.ts b/src/agent/infra/map/abstract-generator.ts index 1576d3723..4f272c19f 100644 --- a/src/agent/infra/map/abstract-generator.ts +++ b/src/agent/infra/map/abstract-generator.ts @@ -50,6 +50,115 @@ ${content} /** Truncate content before embedding in LLM prompts to avoid exceeding model context windows during bulk ingest. */ const MAX_ABSTRACT_CONTENT_CHARS = 20_000 +/** + * Per-file truncation when N files share a single batched call. Matches the + * non-batched cap (20 KB) so each file gets the same view of its content + * regardless of batched vs per-file mode — total batched user content scales + * linearly with N. Avoids quality regression on long-file curates that batched + * mode would otherwise see. + */ +const MAX_BATCHED_CONTENT_CHARS_PER_FILE = MAX_ABSTRACT_CONTENT_CHARS + +/** L0 batch output budget: 5 files × ~80 tokens + framing tags ≈ 600 tokens. */ +const BATCH_L0_MAX_OUTPUT_TOKENS = 800 + +/** L1 batch output budget: 5 files × ~1500 tokens + framing tags ≈ 8000 tokens. */ +const BATCH_L1_MAX_OUTPUT_TOKENS = 8500 + +/** + * Result from a batched abstract generation. One entry per input item, in + * input order. Empty string fields signal the model failed to produce content + * for that path — the caller's existing fail-open semantics still apply. + */ +export interface BatchedAbstractItem { + abstractContent: string + contextPath: string + overviewContent: string +} + +const BATCHED_ABSTRACT_SYSTEM_PROMPT = `You are a technical documentation assistant. +You produce precise one-line summaries of knowledge documents in a strict XML format. +Output ONLY the XML — no preamble, no commentary, no markdown fences.` + +const BATCHED_OVERVIEW_SYSTEM_PROMPT = `You are a technical documentation assistant. +You produce structured overviews of knowledge documents in a strict XML format. +Output ONLY the XML — no preamble, no commentary, no markdown fences.` + +function escapeXmlAttr(value: string): string { + return value.replaceAll('&', '&').replaceAll('"', '"').replaceAll('<', '<').replaceAll('>', '>') +} + +function buildBatchedAbstractPrompt(items: ReadonlyArray<{content: string; contextPath: string;}>): string { + const filesXml = items.map((it) => ` + +${it.content} + +`).join('\n') + + return `For each of the following knowledge documents, produce a ONE-LINE summary (max 80 tokens) that is a complete sentence capturing the core topic and key insight. + +Output format — emit exactly one element per input file, with the same path attribute: +One-line summary. + +Output only these XML elements, in any order. No preamble, no markdown fences. + + +${filesXml} +` +} + +function buildBatchedOverviewPrompt(items: ReadonlyArray<{content: string; contextPath: string;}>): string { + const filesXml = items.map((it) => ` + +${it.content} + +`).join('\n') + + return `For each of the following knowledge documents, produce a structured overview (markdown, under 1500 tokens) that includes: +- Key points (3-7 bullet points) +- Structure / sections summary +- Any notable entities, patterns, or decisions mentioned + +Output format — emit exactly one element per input file, with the same path attribute: + +- bullet 1 +- bullet 2 +... + + +Output only these XML elements, in any order. No preamble, no markdown fences. + + +${filesXml} +` +} + +/** + * Extract ... per from the model output. + * Tolerant: ignores extra whitespace, supports nested newlines inside the inner + * tag. Returns a Map keyed by path. Paths that don't appear are absent. + */ +function parseBatchedTags(response: string, innerTag: 'abstract' | 'overview'): Map { + const result = new Map() + // Match ...BODY...; lazy on body so + // multiple blocks don't bleed into one. The `.` flag-less regex relies + // on the s-flag (dotAll) to span newlines. + const fileRe = /]*>([\s\S]*?)<\/file>/g + const innerRe = new RegExp(`<${innerTag}>([\\s\\S]*?)<\\/${innerTag}>`) + + let m: null | RegExpExecArray + while ((m = fileRe.exec(response)) !== null) { + const [, rawPath, innerXml] = m + const path = rawPath.replaceAll('&', '&').replaceAll('"', '"').replaceAll('<', '<').replaceAll('>', '>') + const inner = innerRe.exec(innerXml) + if (inner) { + result.set(path, inner[1].trim()) + } + } + + return result +} + /** * Generate L0 abstract and L1 overview for a knowledge file. * @@ -88,3 +197,56 @@ export async function generateFileAbstracts( overviewContent: overviewText.trim(), } } + +/** + * Generate L0 abstracts and L1 overviews for N knowledge files in two batched + * LLM calls (one batch for all L0s, one for all L1s) instead of 2N per-file + * calls. + * + * Two parallel calls; each call carries all input files in an XML envelope + * and the model is instructed to return one element per file. Output is + * parsed by path tag and matched back to the input order. Files the model + * fails to produce content for receive empty strings (caller's existing + * fail-open semantics still apply). + * + * Caller is responsible for capping batch size; this function does not split + * its input. Recommended cap is 5 files per call to keep the L1 batch's + * output budget under ~8K tokens. + */ +export async function generateFileAbstractsBatch( + items: ReadonlyArray<{contextPath: string; fullContent: string}>, + generator: IContentGenerator, +): Promise { + if (items.length === 0) return [] + + const truncated = items.map((it) => ({ + content: it.fullContent.slice(0, MAX_BATCHED_CONTENT_CHARS_PER_FILE), + contextPath: it.contextPath, + })) + + const [abstractText, overviewText] = await Promise.all([ + streamToText(generator, { + config: {maxTokens: BATCH_L0_MAX_OUTPUT_TOKENS, temperature: 0}, + contents: [{content: buildBatchedAbstractPrompt(truncated), role: 'user'}], + model: 'default', + systemPrompt: BATCHED_ABSTRACT_SYSTEM_PROMPT, + taskId: randomUUID(), + }), + streamToText(generator, { + config: {maxTokens: BATCH_L1_MAX_OUTPUT_TOKENS, temperature: 0}, + contents: [{content: buildBatchedOverviewPrompt(truncated), role: 'user'}], + model: 'default', + systemPrompt: BATCHED_OVERVIEW_SYSTEM_PROMPT, + taskId: randomUUID(), + }), + ]) + + const abstracts = parseBatchedTags(abstractText, 'abstract') + const overviews = parseBatchedTags(overviewText, 'overview') + + return items.map((it) => ({ + abstractContent: (abstracts.get(it.contextPath) ?? '').trim(), + contextPath: it.contextPath, + overviewContent: (overviews.get(it.contextPath) ?? '').trim(), + })) +} diff --git a/src/agent/infra/map/abstract-queue.ts b/src/agent/infra/map/abstract-queue.ts index 05d17aaba..2884544d3 100644 --- a/src/agent/infra/map/abstract-queue.ts +++ b/src/agent/infra/map/abstract-queue.ts @@ -4,7 +4,17 @@ import {join} from 'node:path' import type {IContentGenerator} from '../../core/interfaces/i-content-generator.js' -import {generateFileAbstracts} from './abstract-generator.js' +import {generateFileAbstractsBatch} from './abstract-generator.js' + +/** + * Maximum files combined into a single batched L0/L1 LLM call. + * + * Two parallel calls fire per cycle: one L0 batch (~80 tok output × N files + + * tags), one L1 batch (~1500 tok output × N files + tags). At N=5 the L1 + * output budget caps at ~8K tokens; raising N further risks output truncation + * on smaller-context models. Lowering N reduces savings without quality gain. + */ +const BATCH_SIZE_CAP = 5 const QUEUE_TRACE_ENABLED = process.env.BRV_QUEUE_TRACE === '1' const LOG_PATH = process.env.BRV_SESSION_LOG @@ -47,6 +57,13 @@ export interface AbstractQueueStatus { * - drain() waits for all pending/processing items to complete (for graceful shutdown) */ export class AbstractGenerationQueue { + /** + * When true, scheduleNext fires the next batch even if pending is below + * BATCH_SIZE_CAP. Set by drain(); reset once the queue is fully idle. + * Without this, items below the cap would be buffered indefinitely with + * no flush trigger when a curate writes fewer files than the cap. + */ + private drainRequested = false private drainResolvers: Array<() => void> = [] private failed = 0 private generator: IContentGenerator | undefined @@ -71,7 +88,13 @@ export class AbstractGenerationQueue { */ async drain(): Promise { queueLog(`drain:start idle=${this.isIdle()} pending=${this.pending.length} retrying=${this.retrying} processing=${this.processing}`) + // Force any buffered (below-cap) pending items to fire as a final batch. + // scheduleNext respects drainRequested even when pending < BATCH_SIZE_CAP. + this.drainRequested = true + this.scheduleNext() + if (this.isIdle()) { + this.drainRequested = false await this.statusWritePromise.catch(() => {}) queueLog('drain:resolved-immediate') return @@ -105,7 +128,13 @@ export class AbstractGenerationQueue { this.pending.push({attempts: 0, contextPath: item.contextPath, fullContent: item.fullContent}) queueLog(`enqueue path=${item.contextPath} pending=${this.pending.length} retrying=${this.retrying} processing=${this.processing}`) this.queueStatusWrite() - this.scheduleNext() + // Buffer until cap is reached; drain() will trigger the final flush + // for partial batches at curate-end. Without this gating, the first + // enqueue starts a 1-item batch before the curate finishes writing + // the rest of its files. + if (this.pending.length >= BATCH_SIZE_CAP || this.drainRequested) { + this.scheduleNext() + } } /** @@ -151,8 +180,10 @@ export class AbstractGenerationQueue { this.processing = true this.queueStatusWrite() - const item = this.pending.shift()! - queueLog(`process:start path=${item.contextPath} remaining=${this.pending.length} retrying=${this.retrying}`) + // Drain up to BATCH_SIZE_CAP items into a single batch. Items beyond the + // cap stay pending for the next cycle. + const batch = this.pending.splice(0, BATCH_SIZE_CAP) + queueLog(`process:start batchSize=${batch.length} remaining=${this.pending.length} retrying=${this.retrying}`) try { // Refresh credentials before each generation (OAuth tokens may expire) @@ -163,28 +194,45 @@ export class AbstractGenerationQueue { console.debug(`[AbstractQueue] token refresh failed, proceeding with existing generator: ${msg}`) } - const {abstractContent, overviewContent} = await generateFileAbstracts( - item.fullContent, + const results = await generateFileAbstractsBatch( + batch.map((it) => ({contextPath: it.contextPath, fullContent: it.fullContent})), this.generator, ) - // Derive sibling paths: replace .md with .abstract.md and .overview.md - const abstractPath = item.contextPath.replace(/\.md$/, '.abstract.md') - const overviewPath = item.contextPath.replace(/\.md$/, '.overview.md') - - await Promise.all([ - writeFile(abstractPath, abstractContent, 'utf8'), - writeFile(overviewPath, overviewContent, 'utf8'), - ]) - - this.processed++ - queueLog(`process:success path=${item.contextPath} processed=${this.processed}`) + // Write all batched outputs in parallel. Empty strings are valid (model + // produced no content for that path) — preserves existing fail-open. + await Promise.all(results.flatMap((r) => { + const abstractPath = r.contextPath.replace(/\.md$/, '.abstract.md') + const overviewPath = r.contextPath.replace(/\.md$/, '.overview.md') + return [ + writeFile(abstractPath, r.abstractContent, 'utf8'), + writeFile(overviewPath, r.overviewContent, 'utf8'), + ] + })) + + this.processed += batch.length + queueLog(`process:success batchSize=${batch.length} processed=${this.processed}`) } catch (error) { + // Batch-level failure → re-enqueue each item individually with its own + // attempts counter, mirroring per-item retry semantics. Items past + // maxAttempts count as failed. const msg = error instanceof Error ? error.message : String(error) - console.debug(`[AbstractQueue] ${item.contextPath} attempt ${item.attempts + 1}/${this.maxAttempts}: ${msg}`) - item.attempts++ - if (item.attempts < this.maxAttempts) { - // Exponential backoff: 500ms, 1000ms, 2000ms, ... + const failedThisCycle: QueueItem[] = [] + const retryThisCycle: QueueItem[] = [] + for (const item of batch) { + item.attempts++ + if (item.attempts < this.maxAttempts) { + retryThisCycle.push(item) + } else { + this.failed++ + failedThisCycle.push(item) + queueLog(`process:failed path=${item.contextPath} failed=${this.failed}`) + } + } + + console.debug(`[AbstractQueue] batch attempt failed (${msg}); retrying=${retryThisCycle.length}, exhausted=${failedThisCycle.length}`) + + for (const item of retryThisCycle) { const delay = 500 * 2 ** (item.attempts - 1) this.retrying++ this.queueStatusWrite() @@ -195,13 +243,10 @@ export class AbstractGenerationQueue { this.queueStatusWrite() this.scheduleNext() }, delay) - } else { - this.failed++ - queueLog(`process:failed path=${item.contextPath} failed=${this.failed}`) } } finally { this.processing = false - queueLog(`process:finally path=${item.contextPath} pending=${this.pending.length} retrying=${this.retrying} processed=${this.processed} failed=${this.failed}`) + queueLog(`process:finally batchSize=${batch.length} pending=${this.pending.length} retrying=${this.retrying} processed=${this.processed} failed=${this.failed}`) this.queueStatusWrite() } @@ -220,6 +265,10 @@ export class AbstractGenerationQueue { return } + // Reset drain state once the queue settles — next curate's enqueue burst + // should buffer normally up to BATCH_SIZE_CAP again. + this.drainRequested = false + queueLog(`drain:idle pending=${this.pending.length} retrying=${this.retrying} processed=${this.processed} failed=${this.failed}`) const resolvers = this.drainResolvers.splice(0) const settledStatusWrite = this.statusWritePromise.catch(() => {}) @@ -229,11 +278,22 @@ export class AbstractGenerationQueue { } private scheduleNext(): void { - if (!this.generator || this.processing || this.pending.length === 0) { + if (!this.generator || this.processing) { + return + } + + if (this.pending.length === 0) { this.resolveDrainersIfIdle() return } + // Buffer items below the cap unless drain has been requested (curate-end + // signal). This keeps the queue from firing partial 1-item batches in the + // middle of a multi-file curate. + if (this.pending.length < BATCH_SIZE_CAP && !this.drainRequested) { + return + } + // eslint-disable-next-line no-void setImmediate(() => { void this.processNext() }) } diff --git a/test/unit/agent/map/abstract-generator-batch.test.ts b/test/unit/agent/map/abstract-generator-batch.test.ts new file mode 100644 index 000000000..ceb535464 --- /dev/null +++ b/test/unit/agent/map/abstract-generator-batch.test.ts @@ -0,0 +1,132 @@ +import {expect} from 'chai' +import {createSandbox, type SinonSandbox} from 'sinon' + +import type {IContentGenerator} from '../../../../src/agent/core/interfaces/i-content-generator.js' + +import {generateFileAbstractsBatch} from '../../../../src/agent/infra/map/abstract-generator.js' + +/** + * Build a generator whose generateContentStream yields a fixed text response + * the n-th time it's called. Useful for stubbing the parallel L0/L1 batch + * calls with two distinct texts. + */ +function makeScriptedGenerator(sandbox: SinonSandbox, responsesByCall: string[]): IContentGenerator { + let callIndex = 0 + return { + estimateTokensSync: () => 10, + generateContent: sandbox.stub().rejects(new Error('n/a')), + generateContentStream: sandbox.stub().callsFake(async function *() { + const text = responsesByCall[callIndex++] ?? '' + yield {content: text, isComplete: false} + yield {isComplete: true} + }), + } as unknown as IContentGenerator +} + +describe('generateFileAbstractsBatch', () => { + const sandbox = createSandbox() + + afterEach(() => sandbox.restore()) + + it('returns one result per input file when the model responds with all paths', async () => { + const l0Response = [ + 'One-line summary of A.', + 'One-line summary of B.', + ].join('\n') + const l1Response = [ + '- bullet 1\n- bullet 2\n- bullet 3', + '- bullet 1\n- bullet 2', + ].join('\n') + + const generator = makeScriptedGenerator(sandbox, [l0Response, l1Response]) + const result = await generateFileAbstractsBatch( + [ + {contextPath: 'a.md', fullContent: 'content of A'}, + {contextPath: 'b.md', fullContent: 'content of B'}, + ], + generator, + ) + + expect(result).to.have.lengthOf(2) + expect(result[0].contextPath).to.equal('a.md') + expect(result[0].abstractContent).to.equal('One-line summary of A.') + expect(result[0].overviewContent).to.contain('bullet 1') + expect(result[1].contextPath).to.equal('b.md') + expect(result[1].abstractContent).to.equal('One-line summary of B.') + }) + + it('keeps input order when the model returns paths out of order', async () => { + const l0Response = [ + 'B summary.', + 'A summary.', + ].join('\n') + const l1Response = [ + 'B over.', + 'A over.', + ].join('\n') + + const generator = makeScriptedGenerator(sandbox, [l0Response, l1Response]) + const result = await generateFileAbstractsBatch( + [ + {contextPath: 'a.md', fullContent: 'A'}, + {contextPath: 'b.md', fullContent: 'B'}, + ], + generator, + ) + + expect(result.map((r: {contextPath: string}) => r.contextPath)).to.deep.equal(['a.md', 'b.md']) + expect(result[0].abstractContent).to.equal('A summary.') + expect(result[1].abstractContent).to.equal('B summary.') + }) + + it('returns empty strings for files the model omits', async () => { + const l0Response = 'Only A.' + const l1Response = 'Only A over.' + + const generator = makeScriptedGenerator(sandbox, [l0Response, l1Response]) + const result = await generateFileAbstractsBatch( + [ + {contextPath: 'a.md', fullContent: 'A'}, + {contextPath: 'b.md', fullContent: 'B'}, + ], + generator, + ) + + expect(result).to.have.lengthOf(2) + expect(result[0].abstractContent).to.equal('Only A.') + expect(result[1].abstractContent).to.equal('') + expect(result[1].overviewContent).to.equal('') + }) + + it('returns empty strings when the model output is malformed (no matching tags)', async () => { + const generator = makeScriptedGenerator(sandbox, ['random unparseable text', 'also unparseable']) + const result = await generateFileAbstractsBatch( + [ + {contextPath: 'a.md', fullContent: 'A'}, + ], + generator, + ) + + expect(result).to.have.lengthOf(1) + expect(result[0].abstractContent).to.equal('') + expect(result[0].overviewContent).to.equal('') + }) + + it('issues exactly two LLM calls regardless of batch size (one L0 batch, one L1 batch)', async () => { + const l0Response = Array.from({length: 5}, (_, i) => + `S${i}.`, + ).join('\n') + const l1Response = Array.from({length: 5}, (_, i) => + `O${i}.`, + ).join('\n') + + const generator = makeScriptedGenerator(sandbox, [l0Response, l1Response]) + await generateFileAbstractsBatch( + Array.from({length: 5}, (_, i) => ({contextPath: `${i}.md`, fullContent: `c${i}`})), + generator, + ) + + const stubbed = generator.generateContentStream as ReturnType + expect(stubbed.callCount).to.equal(2) + }) +}) diff --git a/test/unit/agent/map/abstract-queue.test.ts b/test/unit/agent/map/abstract-queue.test.ts index 9ea3a4b9d..2339837a1 100644 --- a/test/unit/agent/map/abstract-queue.test.ts +++ b/test/unit/agent/map/abstract-queue.test.ts @@ -20,12 +20,24 @@ function makeFailingGenerator(sandbox: SinonSandbox): IContentGenerator { } as unknown as IContentGenerator } +/** + * Stream stub that responds in the XML format expected by H3's batched generator. + * Sniffs the request's user content for `generated text` per detected + * path. The L0 vs L1 branch is detected from the system prompt. + */ function makeSuccessfulGenerator(sandbox: SinonSandbox): IContentGenerator { return { estimateTokensSync: () => 10, generateContent: sandbox.stub().rejects(new Error('n/a')), - generateContentStream: sandbox.stub().callsFake(async function *() { - yield {content: 'generated text', isComplete: false} + generateContentStream: sandbox.stub().callsFake(async function *(request: {contents?: Array<{content?: string}>; systemPrompt?: string}) { + const userContent = request.contents?.[0]?.content ?? '' + const isAbstract = (request.systemPrompt ?? '').includes('one-line') + const innerTag = isAbstract ? 'abstract' : 'overview' + const pathMatches = [...userContent.matchAll(/ 0 ? pathMatches.map((m) => m[1]) : ['unknown'] + const xml = paths.map((p) => `<${innerTag}>generated text`).join('\n') + yield {content: xml, isComplete: false} yield {isComplete: true} }), } as unknown as IContentGenerator @@ -99,24 +111,28 @@ describe('AbstractGenerationQueue', () => { const q = new AbstractGenerationQueue(tmpDir, 2) // maxAttempts=2 → one retry q.setGenerator(generator) - q.enqueue({contextPath: join(tmpDir, 'file.md'), fullContent: 'content'}) + // Enqueue BATCH_SIZE_CAP=5 items so the batch fires immediately. + for (let i = 0; i < 5; i++) { + q.enqueue({contextPath: join(tmpDir, `file-${i}.md`), fullContent: 'content'}) + } - // scheduleNext fires via setImmediate; processNext is now awaiting generateFileAbstracts + // scheduleNext fires via setImmediate; processNext is now awaiting generateFileAbstractsBatch await new Promise((r) => { setImmediate(r) }) expect(q.getStatus().processing).to.equal(true) - // Trigger failure — processNext catch fires: retrying++, setTimeout(500ms backoff) + // Trigger failure — Promise.all over the two parallel streams rejects, + // processNext catch fires: each item retrying++, setTimeout(500ms backoff) rejectNextCall(new Error('deliberate failure')) - // Two setImmediate ticks: one for the catch block, one for the finally block + // Ticks for catch + finally + microtask queue + await new Promise((r) => { setImmediate(r) }) await new Promise((r) => { setImmediate(r) }) await new Promise((r) => { setImmediate(r) }) - // Item is now in retry backoff: retrying=1, pending=[] - // Before fix: getStatus().pending was 0 (retrying items invisible) - // After fix: getStatus().pending is 1 (retrying items folded into pending) + // All 5 items now in retry backoff: retrying=5, pending=[] + // getStatus().pending folds retrying into pending so callers don't see false-idle. const status = q.getStatus() expect(status.processing).to.equal(false) - expect(status.pending).to.equal(1) + expect(status.pending).to.equal(5) expect(status.failed).to.equal(0) }) }) @@ -215,6 +231,79 @@ describe('AbstractGenerationQueue', () => { expect(written.pending).to.equal(1) }) + it('buffers items below BATCH_SIZE_CAP without firing LLM calls', async () => { + const successfulGenerator = makeSuccessfulGenerator(sandbox) + const q = new AbstractGenerationQueue(tmpDir) + q.setGenerator(successfulGenerator) + + // Enqueue 3 items — below BATCH_SIZE_CAP=5, so no batch should fire. + for (let i = 0; i < 3; i++) { + q.enqueue({contextPath: join(tmpDir, `f${i}.md`), fullContent: `content ${i}`}) + } + + // Give scheduleNext time to (incorrectly) fire if the buffer guard is broken. + await new Promise((r) => { setImmediate(r) }) + await new Promise((r) => { setImmediate(r) }) + + const stub = successfulGenerator.generateContentStream as ReturnType + expect(stub.callCount).to.equal(0, 'Expected 0 LLM calls while pending is below BATCH_SIZE_CAP') + expect(q.getStatus()).to.deep.equal({failed: 0, pending: 3, processed: 0, processing: false}) + + // drain() forces the partial batch to flush → exactly 2 stream calls. + await q.drain() + expect(stub.callCount).to.equal(2, 'Expected drain() to flush the partial batch as 1×L0 + 1×L1') + expect(q.getStatus().processed).to.equal(3) + }) + + it('processes up to BATCH_SIZE_CAP items in a single LLM cycle', async () => { + const successfulGenerator = makeSuccessfulGenerator(sandbox) + const q = new AbstractGenerationQueue(tmpDir) + q.setGenerator(successfulGenerator) + + const N = 5 + for (let i = 0; i < N; i++) { + q.enqueue({contextPath: join(tmpDir, `f${i}.md`), fullContent: `content ${i}`}) + } + + await q.drain() + + // 1 batch * 2 LLM calls (L0 + L1) = exactly 2 stream calls for N=5 + const stub = successfulGenerator.generateContentStream as ReturnType + expect(stub.callCount).to.equal(2, 'Expected exactly 2 LLM stream calls for a 5-item batch (1×L0 + 1×L1)') + expect(q.getStatus()).to.deep.equal({failed: 0, pending: 0, processed: N, processing: false}) + + // Every file gets its abstract.md and overview.md written + const fileChecks = Array.from({length: N}, async (_, i) => { + const abstractPath = join(tmpDir, `f${i}.abstract.md`) + const overviewPath = join(tmpDir, `f${i}.overview.md`) + const [abstractText, overviewText] = await Promise.all([ + fs.readFile(abstractPath, 'utf8'), + fs.readFile(overviewPath, 'utf8'), + ]) + expect(abstractText).to.equal('generated text') + expect(overviewText).to.equal('generated text') + }) + await Promise.all(fileChecks) + }) + + it('splits oversized backlogs into multiple batches', async () => { + const successfulGenerator = makeSuccessfulGenerator(sandbox) + const q = new AbstractGenerationQueue(tmpDir) + q.setGenerator(successfulGenerator) + + const N = 7 // > BATCH_SIZE_CAP=5 → expect 2 batches (5 + 2) + for (let i = 0; i < N; i++) { + q.enqueue({contextPath: join(tmpDir, `f${i}.md`), fullContent: `content ${i}`}) + } + + await q.drain() + + const stub = successfulGenerator.generateContentStream as ReturnType + // 2 batches × 2 LLM calls each = 4 stream calls + expect(stub.callCount).to.equal(4, 'Expected 4 stream calls for 7 items split into batches of 5+2') + expect(q.getStatus().processed).to.equal(N) + }) + it('status file reflects retrying items in pending count', async () => { const {generator, rejectNextCall} = makeControlledGenerator(sandbox) const q = new AbstractGenerationQueue(tmpDir, 2) From 0cf9ae52b69408fcd4f4a76903a701224defccaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 00:46:47 +0700 Subject: [PATCH 02/13] feat: [ENG-2485] defer summary cascade to dream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the LLM-bound parent-summary cascade off curate's hot path. Curate now enqueues changed paths to a stale-summary queue persisted in .brv/dream-state.json; the next dream cycle drains the queue, merges with its own snapshot diff (A ∪ B), and runs propagateStaleness once per dirty directory. - DreamState schema gains staleSummaryPaths[]; new service methods enqueueStaleSummaryPaths (RMW with path-level dedup, oldest enqueuedAt preserved) and drainStaleSummaryPaths (atomic snapshot-and-clear). - curate-executor swaps the inline propagateStaleness call for an enqueueStaleSummaryPaths call. Manifest rebuild stays inline (pure filesystem, no LLM). - dream-executor's step 5 drains the queue, unions with the diff-set, and runs propagateStaleness once per unique path. On propagation failure the catch re-enqueues the drained snapshot so atomic drain doesn't lose work. - runStaleSummaryPropagation extracted as a protected seam; preserves the ENG-2100 parentTaskId threading so summary regenerations still share one billing session with the parent dream task. Validation: - validate.mjs: 15/15 BM25 invariants hold under stale _index.md - Real-LLM E2E: 8 curates / 2 dreams on byterover paid-tier - dedup, multi-curate accumulation, drain, re-enqueue on failure - Full unit suite: 6981 passing --- src/server/infra/dream/dream-state-schema.ts | 14 ++ src/server/infra/dream/dream-state-service.ts | 50 +++++- src/server/infra/executor/curate-executor.ts | 27 +-- src/server/infra/executor/dream-executor.ts | 69 ++++++-- .../infra/dream/dream-state-schema.test.ts | 54 +++++- .../infra/dream/dream-state-service.test.ts | 159 ++++++++++++++++++ test/unit/infra/dream/dream-trigger.test.ts | 1 + .../infra/executor/curate-executor.test.ts | 21 ++- .../infra/executor/dream-executor.test.ts | 126 +++++++++++++- 9 files changed, 486 insertions(+), 35 deletions(-) diff --git a/src/server/infra/dream/dream-state-schema.ts b/src/server/infra/dream/dream-state-schema.ts index 1ab7a9fd9..7fa973f30 100644 --- a/src/server/infra/dream/dream-state-schema.ts +++ b/src/server/infra/dream/dream-state-schema.ts @@ -7,23 +7,37 @@ export const PendingMergeSchema = z.object({ suggestedByDreamId: z.string(), }) +/** + * One entry in the stale-summary queue drained at the next dream cycle. + * `enqueuedAt` is preserved across dedup'd re-enqueues so future telemetry + * (e.g., "oldest waiting path") can read meaningful wait times even though + * no consumer reads it today. + */ +export const StaleSummaryEntrySchema = z.object({ + enqueuedAt: z.number().int().nonnegative(), + path: z.string(), +}) + export const DreamStateSchema = z.object({ curationsSinceDream: z.number().int().min(0), lastDreamAt: z.string().datetime().nullable(), lastDreamLogId: z.string().nullable(), pendingMerges: z.array(PendingMergeSchema).optional().default([]), + staleSummaryPaths: z.array(StaleSummaryEntrySchema).optional().default([]), totalDreams: z.number().int().min(0), version: z.literal(1), }) export type DreamState = z.infer export type PendingMerge = z.infer +export type StaleSummaryEntry = z.infer export const EMPTY_DREAM_STATE: DreamState = { curationsSinceDream: 0, lastDreamAt: null, lastDreamLogId: null, pendingMerges: [], + staleSummaryPaths: [], totalDreams: 0, version: 1, } diff --git a/src/server/infra/dream/dream-state-service.ts b/src/server/infra/dream/dream-state-service.ts index 44c2b8e53..1f941fdf8 100644 --- a/src/server/infra/dream/dream-state-service.ts +++ b/src/server/infra/dream/dream-state-service.ts @@ -49,6 +49,52 @@ export class DreamStateService { this.stateFilePath = join(opts.baseDir, STATE_FILENAME) } + /** + * Atomic drain — reads the current queue and clears it in a single RMW, + * returning the deduped path list. The caller is responsible for retrying + * (re-enqueueing the returned snapshot) if the downstream work fails. + * + * Atomicity is the load-bearing property: any enqueue that runs after the + * drain returns sees an empty queue, so it always appends a fresh entry + * that survives independently of whether the downstream propagation succeeds + * or fails. Earlier "snapshot + clear-later" approaches lost same-path + * enqueues: the dedup check on enqueue saw the still-present snapshot entry + * and skipped, then `clear()` removed it. + */ + async drainStaleSummaryPaths(): Promise { + let snapshot: string[] = [] + await this.update((state) => { + snapshot = state.staleSummaryPaths.map((e) => e.path) + if (snapshot.length === 0) return state + return {...state, staleSummaryPaths: []} + }) + return snapshot + } + + /** + * Append the given file paths to the stale-summary queue, deduping by path. + * A path already in the queue keeps its original `enqueuedAt` timestamp so + * "how long has this been waiting?" telemetry stays meaningful. + * + * Serialized through {@link update} so concurrent enqueues from parallel + * curate tasks do not lose entries. Empty input is a no-op (no write). + */ + async enqueueStaleSummaryPaths(paths: string[]): Promise { + if (paths.length === 0) return + const enqueuedAt = Date.now() + await this.update((state) => { + const existing = new Set(state.staleSummaryPaths.map((e) => e.path)) + const additions = paths + .filter((p) => !existing.has(p)) + .map((p) => ({enqueuedAt, path: p})) + if (additions.length === 0) return state + return { + ...state, + staleSummaryPaths: [...state.staleSummaryPaths, ...additions], + } + }) + } + /** * Read-modify-write under a per-file mutex. Serializes concurrent increments * from parallel curate tasks within the same agent process so no updates are lost. @@ -61,10 +107,10 @@ export class DreamStateService { try { const raw = await readFile(this.stateFilePath, 'utf8') const parsed = DreamStateSchema.safeParse(JSON.parse(raw)) - if (!parsed.success) return {...EMPTY_DREAM_STATE, pendingMerges: []} + if (!parsed.success) return {...EMPTY_DREAM_STATE} return parsed.data } catch { - return {...EMPTY_DREAM_STATE, pendingMerges: []} + return {...EMPTY_DREAM_STATE} } } diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index c0ba60674..9d03c38cf 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -14,7 +14,6 @@ import { import {validateFileForCurate} from '../../utils/file-validator.js' import {FileContextTreeManifestService} from '../context-tree/file-context-tree-manifest-service.js' import {FileContextTreeSnapshotService} from '../context-tree/file-context-tree-snapshot-service.js' -import {FileContextTreeSummaryService} from '../context-tree/file-context-tree-summary-service.js' import {diffStates} from '../context-tree/snapshot-diff.js' import {DreamStateService} from '../dream/dream-state-service.js' import {PreCompactionService} from './pre-compaction/pre-compaction-service.js' @@ -132,29 +131,33 @@ export class CurateExecutor implements ICurateExecutor { // Parse curation status from agent response for status tracking this.lastStatus = this.parseCurationStatus(taskId, response) - // --- Phase 4: Post-curation summary propagation (fail-open) --- + // Summary cascade regeneration (the LLM-driven `propagateStaleness` walk) + // is deferred to the next dream cycle to keep curate's hot path free of + // LLM calls. The manifest is rebuilt inline because it is a pure file + // scan (no LLM) and keeps newly-curated leaf files immediately + // discoverable via manifest-driven retrieval. + // Hoisted: both blocks below construct a DreamStateService against the + // same project. They share the module-level mutex via `getStateMutex`, + // so a single instance is sufficient and avoids duplicate construction. + const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) + if (preState) { try { const postState = await snapshotService.getCurrentState(baseDir) const changedPaths = diffStates(preState, postState) if (changedPaths.length > 0) { - const summaryService = new FileContextTreeSummaryService() - const results = await summaryService.propagateStaleness(changedPaths, agent, baseDir, taskId) - - // Opportunistic manifest rebuild (pre-warm for next query) - if (results.some((r) => r.actionTaken)) { - const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) - await manifestService.buildManifest(baseDir) - } + await dreamStateService.enqueueStaleSummaryPaths(changedPaths) + + const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) + await manifestService.buildManifest(baseDir) } } catch { - // Fail-open: summary/manifest errors never block curation + // Fail-open: queue/manifest errors never block curation } } // Increment dream curation counter (fail-open: non-critical for curation) try { - const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) await dreamStateService.incrementCurationCount() } catch { // Dream state tracking is non-critical — don't block curation diff --git a/src/server/infra/executor/dream-executor.ts b/src/server/infra/executor/dream-executor.ts index 1b10acfe9..2869183c0 100644 --- a/src/server/infra/executor/dream-executor.ts +++ b/src/server/infra/executor/dream-executor.ts @@ -55,6 +55,8 @@ export type DreamExecutorDeps = { save(entry: DreamLogEntry): Promise } dreamStateService: { + drainStaleSummaryPaths(): Promise + enqueueStaleSummaryPaths(paths: string[]): Promise read(): Promise update(updater: (state: import('../dream/dream-state-schema.js').DreamState) => import('../dream/dream-state-schema.js').DreamState): Promise write(state: import('../dream/dream-state-schema.js').DreamState): Promise @@ -127,7 +129,11 @@ export class DreamExecutor { try { preState = await snapshotService.getCurrentState(projectRoot) } catch { - // Fail-open: if snapshot fails, skip propagation + // Fail-open: if snapshot fails, the entire step 5 block (propagation + + // queue drain) is skipped — the stale-summary queue is not consumed in + // this dream cycle and accumulates until the next successful run. + // Drain-skip is preferable to drain-and-lose because atomic drain + // already removed entries before any propagation could re-enqueue them. } // Step 2: Load dream state @@ -150,18 +156,38 @@ export class DreamExecutor { }) // Step 5: Post-dream propagation (fail-open) + // Two sources of stale summary paths: + // A. The stale-summary queue, drained from dream state — paths from + // curate operations that ran since the last dream cycle (the LLM + // cascade work was deferred from curate's hot path to here). + // B. Dream's own snapshot diff — paths changed by this dream's + // consolidate/synthesize/prune operations. + // Merging A ∪ B before calling propagateStaleness lets a path touched + // by both sources regenerate exactly once. The queue is drained + // atomically (cleared in the same RMW that captures the snapshot) so + // any concurrent curate enqueueing during propagation appends a fresh + // entry to the now-empty queue and the next dream picks it up. if (preState) { + let drainedSnapshot: string[] = [] try { + drainedSnapshot = await this.deps.dreamStateService.drainStaleSummaryPaths() + const postState = await snapshotService.getCurrentState(projectRoot) const changedPaths = diffStates(preState, postState) - if (changedPaths.length > 0) { - const summaryService = new FileContextTreeSummaryService() - await summaryService.propagateStaleness(changedPaths, agent, projectRoot, options.taskId) - const manifestService = new FileContextTreeManifestService({baseDirectory: projectRoot}) - await manifestService.buildManifest(projectRoot) + + const merged = [...new Set([...changedPaths, ...drainedSnapshot])] + if (merged.length > 0) { + await this.runStaleSummaryPropagation({agent, parentTaskId: options.taskId, paths: merged, projectRoot}) } } catch { - // Fail-open: propagation errors never block dream + // Fail-open: propagation errors never block dream. Re-enqueue the + // drained snapshot so the next dream cycle retries — atomic drain + // already removed them, so without this they would be lost. + if (drainedSnapshot.length > 0) { + await this.deps.dreamStateService.enqueueStaleSummaryPaths(drainedSnapshot).catch(() => { + // If the re-enqueue itself fails, there is nothing more to do here. + }) + } } } @@ -253,11 +279,6 @@ export class DreamExecutor { } } - /** - * Runs the three dream operations sequentially, pushing results into `out` after - * each step. Extracted so the executor can preserve partial work when a later step - * throws — and so tests can inject controlled ops without a full LLM round-trip. - */ protected async runOperations(args: { agent: ICipherAgent changedFiles: Set @@ -312,6 +333,30 @@ export class DreamExecutor { ) } + /** + * Runs the three dream operations sequentially, pushing results into `out` after + * each step. Extracted so the executor can preserve partial work when a later step + * throws — and so tests can inject controlled ops without a full LLM round-trip. + */ + /** + * Regenerate parent `_index.md` files for the given paths and rebuild the + * manifest. Extracted as a seam so tests can override and assert which + * paths were passed (the A ∪ B merge in step 5 is the central correctness + * invariant of the deferral). Production constructs the services here so + * the dependency surface of {@link DreamExecutorDeps} stays narrow. + */ + protected async runStaleSummaryPropagation(args: { + agent: ICipherAgent + parentTaskId?: string + paths: string[] + projectRoot: string + }): Promise { + const summaryService = new FileContextTreeSummaryService() + await summaryService.propagateStaleness(args.paths, args.agent, args.projectRoot, args.parentTaskId) + const manifestService = new FileContextTreeManifestService({baseDirectory: args.projectRoot}) + await manifestService.buildManifest(args.projectRoot) + } + /** Errors are tracked at the log level (status='error'), not per-operation — always 0 here. */ private computeSummary(operations: DreamOperation[]): DreamLogSummary { const summary: DreamLogSummary = {consolidated: 0, errors: 0, flaggedForReview: 0, pruned: 0, synthesized: 0} diff --git a/test/unit/infra/dream/dream-state-schema.test.ts b/test/unit/infra/dream/dream-state-schema.test.ts index 90951a0d6..d7f4d155e 100644 --- a/test/unit/infra/dream/dream-state-schema.test.ts +++ b/test/unit/infra/dream/dream-state-schema.test.ts @@ -1,6 +1,6 @@ import {expect} from 'chai' -import {DreamStateSchema, EMPTY_DREAM_STATE, PendingMergeSchema} from '../../../../src/server/infra/dream/dream-state-schema.js' +import {DreamStateSchema, EMPTY_DREAM_STATE, PendingMergeSchema, StaleSummaryEntrySchema} from '../../../../src/server/infra/dream/dream-state-schema.js' describe('dream-state-schema', () => { describe('DreamStateSchema', () => { @@ -17,6 +17,7 @@ describe('DreamStateSchema', () => { suggestedByDreamId: 'drm-1712736000000', }, ], + staleSummaryPaths: [], totalDreams: 3, version: 1, } @@ -120,8 +121,59 @@ describe('EMPTY_DREAM_STATE', () => { expect(EMPTY_DREAM_STATE.lastDreamAt).to.be.null expect(EMPTY_DREAM_STATE.lastDreamLogId).to.be.null expect(EMPTY_DREAM_STATE.pendingMerges).to.deep.equal([]) + expect(EMPTY_DREAM_STATE.staleSummaryPaths).to.deep.equal([]) expect(EMPTY_DREAM_STATE.totalDreams).to.equal(0) expect(EMPTY_DREAM_STATE.version).to.equal(1) }) }) + +describe('StaleSummaryEntrySchema', () => { + it('should parse a valid entry', () => { + const input = {enqueuedAt: 1_745_539_200_000, path: 'auth/jwt/token.md'} + const result = StaleSummaryEntrySchema.parse(input) + expect(result).to.deep.equal(input) + }) + + it('should reject a non-integer enqueuedAt', () => { + expect(() => StaleSummaryEntrySchema.parse({enqueuedAt: 1.5, path: 'a.md'})).to.throw() + }) + + it('should reject a negative enqueuedAt', () => { + expect(() => StaleSummaryEntrySchema.parse({enqueuedAt: -1, path: 'a.md'})).to.throw() + }) + + it('should reject a missing path', () => { + expect(() => StaleSummaryEntrySchema.parse({enqueuedAt: 0})).to.throw() + }) +}) + +describe('DreamStateSchema staleSummaryPaths', () => { + it('should default staleSummaryPaths to [] when missing', () => { + const input = { + curationsSinceDream: 0, + lastDreamAt: null, + lastDreamLogId: null, + totalDreams: 0, + version: 1, + } + const result = DreamStateSchema.parse(input) + expect(result.staleSummaryPaths).to.deep.equal([]) + }) + + it('should accept a populated staleSummaryPaths array', () => { + const input = { + curationsSinceDream: 0, + lastDreamAt: null, + lastDreamLogId: null, + staleSummaryPaths: [ + {enqueuedAt: 1_745_539_200_000, path: 'auth/jwt/token.md'}, + {enqueuedAt: 1_745_546_400_000, path: 'billing/webhooks/stripe.md'}, + ], + totalDreams: 0, + version: 1, + } + const result = DreamStateSchema.parse(input) + expect(result.staleSummaryPaths).to.have.lengthOf(2) + }) +}) }) diff --git a/test/unit/infra/dream/dream-state-service.test.ts b/test/unit/infra/dream/dream-state-service.test.ts index 52970ce55..936181480 100644 --- a/test/unit/infra/dream/dream-state-service.test.ts +++ b/test/unit/infra/dream/dream-state-service.test.ts @@ -13,6 +13,7 @@ function makeState(overrides: Partial = {}): DreamState { lastDreamAt: null, lastDreamLogId: null, pendingMerges: [], + staleSummaryPaths: [], totalDreams: 0, version: 1, ...overrides, @@ -217,4 +218,162 @@ describe('DreamStateService', () => { expect(final.totalDreams).to.equal(5) }) }) + + // ========================================================================== + // enqueueStaleSummaryPaths — defer summary cascade + // ========================================================================== + + describe('enqueueStaleSummaryPaths', () => { + it('appends new paths to an empty queue', async () => { + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md', 'billing/webhooks/stripe.md']) + const state = await service.read() + expect(state.staleSummaryPaths.map((e) => e.path)).to.deep.equal([ + 'auth/jwt/token.md', + 'billing/webhooks/stripe.md', + ]) + }) + + it('stamps each entry with enqueuedAt at the moment of the call', async () => { + const before = Date.now() + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + const after = Date.now() + + const state = await service.read() + expect(state.staleSummaryPaths).to.have.lengthOf(1) + const [entry] = state.staleSummaryPaths + expect(entry.enqueuedAt).to.be.at.least(before) + expect(entry.enqueuedAt).to.be.at.most(after) + }) + + it('dedups entries by path (keeps oldest enqueuedAt)', async () => { + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + const firstState = await service.read() + const firstStamp = firstState.staleSummaryPaths[0].enqueuedAt + + // ensure the second call's Date.now() is strictly later + await new Promise((resolve) => { + setTimeout(resolve, 5) + }) + + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md', 'billing/webhooks/stripe.md']) + const secondState = await service.read() + + expect(secondState.staleSummaryPaths).to.have.lengthOf(2) + const tokenEntry = secondState.staleSummaryPaths.find((e) => e.path === 'auth/jwt/token.md') + expect(tokenEntry?.enqueuedAt, 'oldest enqueuedAt preserved on dedup').to.equal(firstStamp) + }) + + it('preserves other state fields when enqueuing', async () => { + await service.write(makeState({ + curationsSinceDream: 7, + totalDreams: 2, + })) + + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + + const state = await service.read() + expect(state.curationsSinceDream).to.equal(7) + expect(state.totalDreams).to.equal(2) + expect(state.staleSummaryPaths).to.have.lengthOf(1) + }) + + it('is a no-op for an empty input array', async () => { + await service.enqueueStaleSummaryPaths([]) + const state = await service.read() + expect(state.staleSummaryPaths).to.deep.equal([]) + }) + + it('does not lose entries when 10 enqueues run concurrently', async () => { + const paths = Array.from({length: 10}, (_, i) => `domain/topic-${i}.md`) + await Promise.all(paths.map((p) => service.enqueueStaleSummaryPaths([p]))) + const state = await service.read() + const stored = state.staleSummaryPaths.map((e) => e.path).sort() + expect(stored).to.deep.equal([...paths].sort()) + }) + }) + + // ========================================================================== + // drainStaleSummaryPaths — snapshot-and-clear pattern + // ========================================================================== + + describe('drainStaleSummaryPaths', () => { + it('returns the current snapshot of paths AND clears the queue atomically', async () => { + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md', 'billing/webhooks/stripe.md']) + + const snapshot = await service.drainStaleSummaryPaths() + expect(snapshot.sort()).to.deep.equal([ + 'auth/jwt/token.md', + 'billing/webhooks/stripe.md', + ]) + + // queue is empty after drain — the same RMW that read it cleared it + const state = await service.read() + expect(state.staleSummaryPaths).to.deep.equal([]) + }) + + it('returns an empty snapshot when the queue is empty', async () => { + const snapshot = await service.drainStaleSummaryPaths() + expect(snapshot).to.deep.equal([]) + }) + + it('different-path enqueue during processing survives', async () => { + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + const snapshot = await service.drainStaleSummaryPaths() + expect(snapshot).to.deep.equal(['auth/jwt/token.md']) + + // simulate a curate enqueue happening WHILE the dream is processing + await service.enqueueStaleSummaryPaths(['billing/webhooks/stripe.md']) + + const state = await service.read() + expect(state.staleSummaryPaths.map((e) => e.path)).to.deep.equal(['billing/webhooks/stripe.md']) + }) + + it('drain on an empty queue returns an empty snapshot and leaves enqueues untouched', async () => { + const snapshot = await service.drainStaleSummaryPaths() + expect(snapshot).to.deep.equal([]) + + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + + const state = await service.read() + expect(state.staleSummaryPaths.map((e) => e.path)).to.deep.equal(['auth/jwt/token.md']) + }) + + it('preserves other state fields when draining', async () => { + await service.write(makeState({ + curationsSinceDream: 3, + totalDreams: 1, + })) + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + await service.drainStaleSummaryPaths() + + const state = await service.read() + expect(state.curationsSinceDream).to.equal(3) + expect(state.totalDreams).to.equal(1) + expect(state.staleSummaryPaths).to.deep.equal([]) + }) + + it('preserves a same-path enqueue made after the drain (no race loss)', async () => { + // Repro of the race the reviewer flagged on PR #551: + // 1. Dream drains queue containing X. + // 2. Concurrent curate touches X again — enqueue should record it. + // 3. Dream finishes propagation. + // 4. The post-drain enqueue MUST survive so the next dream picks it up. + // Atomic drain (queue cleared upfront) makes the post-drain enqueue see + // an empty queue, so it always appends fresh. + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + + // (1) Dream drains — entries removed atomically. + const snapshot = await service.drainStaleSummaryPaths() + expect(snapshot).to.deep.equal(['auth/jwt/token.md']) + + // (2) A curate touches the same path during dream propagation. + await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) + + // (3) Dream finishes — no clear() to call; entries already removed at (1). + + // (4) The path enqueued at (2) survives. + const state = await service.read() + expect(state.staleSummaryPaths.map((e) => e.path)).to.deep.equal(['auth/jwt/token.md']) + }) + }) }) diff --git a/test/unit/infra/dream/dream-trigger.test.ts b/test/unit/infra/dream/dream-trigger.test.ts index 7606dfb55..9ac7a9ba2 100644 --- a/test/unit/infra/dream/dream-trigger.test.ts +++ b/test/unit/infra/dream/dream-trigger.test.ts @@ -11,6 +11,7 @@ function makeState(overrides: Partial = {}): DreamState { lastDreamAt: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(), lastDreamLogId: null, pendingMerges: [], + staleSummaryPaths: [], totalDreams: 0, version: 1, ...overrides, diff --git a/test/unit/infra/executor/curate-executor.test.ts b/test/unit/infra/executor/curate-executor.test.ts index 2148a3cba..885ea854c 100644 --- a/test/unit/infra/executor/curate-executor.test.ts +++ b/test/unit/infra/executor/curate-executor.test.ts @@ -18,6 +18,7 @@ import {FileValidationError} from '../../../../src/server/core/domain/errors/tas import {FileContextTreeManifestService} from '../../../../src/server/infra/context-tree/file-context-tree-manifest-service.js' import {FileContextTreeSnapshotService} from '../../../../src/server/infra/context-tree/file-context-tree-snapshot-service.js' import {FileContextTreeSummaryService} from '../../../../src/server/infra/context-tree/file-context-tree-summary-service.js' +import {DreamStateService} from '../../../../src/server/infra/dream/dream-state-service.js' import {CurateExecutor} from '../../../../src/server/infra/executor/curate-executor.js' describe('CurateExecutor (regression)', () => { @@ -250,8 +251,8 @@ describe('CurateExecutor (regression)', () => { }) }) - describe('summary propagation taskId threading (ENG-2100)', () => { - it('passes the curate operation taskId to propagateStaleness so summary LLM calls share one billing session', async () => { + describe('summary cascade deferral to dream (ENG-2485)', () => { + it('enqueues stale-summary paths to the dream queue and does NOT call propagateStaleness inline', async () => { const agent = { cancel: stub().resolves(false), createTaskSession: stub().resolves('session-id'), @@ -283,6 +284,10 @@ describe('CurateExecutor (regression)', () => { 'propagateStaleness', ).resolves([]) stub(FileContextTreeManifestService.prototype, 'buildManifest').resolves() + const enqueueStub = stub(DreamStateService.prototype, 'enqueueStaleSummaryPaths').resolves() + // incrementCurationCount is unrelated dream-state work that runs after the post-curation + // step; stub it so the test doesn't hit disk for the dream state file. + stub(DreamStateService.prototype, 'incrementCurationCount').resolves() const taskId = 'curate-op-uuid-1' const projectRoot = '/projects/myapp' @@ -294,10 +299,14 @@ describe('CurateExecutor (regression)', () => { taskId, }) - expect(propagateStalenessStub.calledOnce).to.be.true - // 4th arg must be the curate's taskId so the billing service groups - // summary regenerations into the same session as the parent operation. - expect(propagateStalenessStub.firstCall.args[3]).to.equal(taskId) + // ENG-2485 invariant: the LLM-bound propagateStaleness walk MUST NOT run + // on the curate hot path. It is deferred to the next dream cycle. + expect(propagateStalenessStub.called).to.equal(false) + + // The deferred work is captured in the dream queue: the changed paths from + // diffStates are enqueued for the next dream cycle to drain. + expect(enqueueStub.calledOnce).to.equal(true) + expect(enqueueStub.firstCall.args[0]).to.deep.equal(['auth/jwt.md']) }) }) }) diff --git a/test/unit/infra/executor/dream-executor.test.ts b/test/unit/infra/executor/dream-executor.test.ts index 41130722a..170c647b7 100644 --- a/test/unit/infra/executor/dream-executor.test.ts +++ b/test/unit/infra/executor/dream-executor.test.ts @@ -48,7 +48,7 @@ function makePartialRunExecutor(args: { } describe('DreamExecutor', () => { - let dreamStateService: {read: SinonStub; update: SinonStub; write: SinonStub} + let dreamStateService: {drainStaleSummaryPaths: SinonStub; enqueueStaleSummaryPaths: SinonStub; read: SinonStub; update: SinonStub; write: SinonStub} let dreamLogStore: {getNextId: SinonStub; save: SinonStub} let dreamLockService: {release: SinonStub; rollback: SinonStub} let curateLogStore: {getNextId: SinonStub; list: SinonStub; save: SinonStub} @@ -63,7 +63,12 @@ describe('DreamExecutor', () => { beforeEach(() => { dreamStateService = { - read: stub().resolves({...EMPTY_DREAM_STATE, pendingMerges: []}), + // Default drain: empty queue. Tests that exercise the queue override. + drainStaleSummaryPaths: stub().resolves([]), + // Default enqueue: no-op stub. Used by the executor's catch block to + // re-enqueue a drained snapshot if propagation fails. + enqueueStaleSummaryPaths: stub().resolves(), + read: stub().resolves({...EMPTY_DREAM_STATE, pendingMerges: [], staleSummaryPaths: []}), // Default update implementation: read → updater → write, mirroring the real // service so tests that count write.callCount stay valid without changes. update: stub().callsFake(async (updater: (state: import('../../../../src/server/infra/dream/dream-state-schema.js').DreamState) => import('../../../../src/server/infra/dream/dream-state-schema.js').DreamState) => { @@ -497,6 +502,123 @@ describe('DreamExecutor', () => { } }) + // ========================================================================== + // Stale-summary queue: drain + re-enqueue on propagation failure + // ========================================================================== + + it('propagates over A ∪ B union of drained queue and snapshot diff (happy path)', async () => { + // The merge at dream-executor.ts is the central correctness invariant of this + // PR — anything in EITHER the queue (A) OR dream's own diff (B) must be + // propagated, exactly once per path. This test pins that invariant. + dreamStateService.drainStaleSummaryPaths.resolves(['queue/path.md']) + + // Real temp project so snapshotService.getCurrentState succeeds. We override + // runOperations to write a new file between pre and post snapshots, so the + // snapshot diff produces a non-empty list — that becomes the B half of A ∪ B. + const projectRoot = mkdtempSync(join(tmpdir(), 'brv-dream-merge-')) + const contextTreeDir = join(projectRoot, '.brv', 'context-tree') + mkdirSync(contextTreeDir, {recursive: true}) + const captured: string[][] = [] + + class MergeTestExecutor extends DreamExecutor { + protected override async runOperations(): Promise { + // Mutate the tree so postState differs from preState by 'diff/added.md'. + mkdirSync(join(contextTreeDir, 'diff'), {recursive: true}) + writeFileSync(join(contextTreeDir, 'diff', 'added.md'), '# new from dream') + } + + protected override async runStaleSummaryPropagation(opts: { + agent: ICipherAgent + paths: string[] + projectRoot: string + }): Promise { + captured.push([...opts.paths].sort()) + } + } + + try { + const executor = new MergeTestExecutor(deps) + await executor.executeWithAgent(agent, {...defaultOptions, projectRoot}) + } finally { + rmSync(projectRoot, {force: true, recursive: true}) + } + + expect(captured).to.have.lengthOf(1) + expect(captured[0]).to.deep.equal(['diff/added.md', 'queue/path.md']) + expect(dreamStateService.enqueueStaleSummaryPaths.callCount).to.equal(0) + }) + + it('dedups paths that appear in both the queue and the snapshot diff (single regeneration)', async () => { + dreamStateService.drainStaleSummaryPaths.resolves(['shared/path.md']) + + const projectRoot = mkdtempSync(join(tmpdir(), 'brv-dream-merge-dedup-')) + const contextTreeDir = join(projectRoot, '.brv', 'context-tree') + mkdirSync(contextTreeDir, {recursive: true}) + const captured: string[][] = [] + + class MergeTestExecutor extends DreamExecutor { + protected override async runOperations(): Promise { + // Write the SAME path the queue contains — the merge must dedup. + mkdirSync(join(contextTreeDir, 'shared'), {recursive: true}) + writeFileSync(join(contextTreeDir, 'shared', 'path.md'), '# also touched by dream') + } + + protected override async runStaleSummaryPropagation(opts: { + agent: ICipherAgent + paths: string[] + projectRoot: string + }): Promise { + captured.push([...opts.paths].sort()) + } + } + + try { + const executor = new MergeTestExecutor(deps) + await executor.executeWithAgent(agent, {...defaultOptions, projectRoot}) + } finally { + rmSync(projectRoot, {force: true, recursive: true}) + } + + expect(captured).to.have.lengthOf(1) + expect(captured[0]).to.deep.equal(['shared/path.md']) + }) + + it('re-enqueues drained snapshot when post-dream propagation throws', async () => { + // Atomic drain removes entries upfront. If propagation fails, the catch + // block must re-enqueue so the snapshot is not lost. + dreamStateService.drainStaleSummaryPaths.resolves([ + 'auth/jwt/token.md', + 'billing/webhooks/stripe.md', + ]) + + // Force the propagation block to throw by making the snapshot service fail. + // The dream-executor wraps Step 5 in try/catch so the dream itself completes. + const projectRoot = mkdtempSync(join(tmpdir(), 'brv-dream-reenqueue-')) + try { + const executor = new DreamExecutor(deps) + // executeWithAgent uses a real FileContextTreeSnapshotService bound to projectRoot. + // The directory exists but has no .brv/context-tree, so getCurrentState throws — + // exercising the catch block that should re-enqueue the drained snapshot. + await executor.executeWithAgent(agent, {...defaultOptions, projectRoot}) + } finally { + rmSync(projectRoot, {force: true, recursive: true}) + } + + expect(dreamStateService.enqueueStaleSummaryPaths.calledOnce).to.equal(true) + expect(dreamStateService.enqueueStaleSummaryPaths.firstCall.args[0]).to.deep.equal([ + 'auth/jwt/token.md', + 'billing/webhooks/stripe.md', + ]) + }) + + it('does not call enqueue when drain returns an empty snapshot (no work to retry)', async () => { + // Default drain stub returns [] — no snapshot to preserve on failure. + const executor = new DreamExecutor(deps) + await executor.executeWithAgent(agent, defaultOptions) + + expect(dreamStateService.enqueueStaleSummaryPaths.callCount).to.equal(0) + }) + // ========================================================================== // Partial / error log preservation (ENG-2126 fix #2) // ========================================================================== From 0e5e1b3e0cc80820929f88bc05f49806499242ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 10:29:56 +0700 Subject: [PATCH 03/13] refactor: [ENG-2485] address review-agent feedback on PR #579 - Bypass dream activity gate when the stale-summary queue has work (codex P1): low-activity projects with 1-2 curates would otherwise strand queued paths indefinitely. dream-trigger.ts now treats a populated queue as enough reason to drain. - Dedup within-batch input in enqueueStaleSummaryPaths: a caller passing a non-unique array no longer produces duplicate queue entries. - Split the curate-executor post-processing try block: snapshot diff, enqueue, and manifest rebuild now have independent fail-open scopes so a transient queue write error cannot skip the (no-LLM) manifest scan that keeps newly-curated leaves immediately discoverable. - Restore the runOperations JSDoc that the cherry-pick orphaned in front of runStaleSummaryPropagation. - Tighten StaleSummaryEntrySchema to z.string().min(1) so empty paths cannot persist into dream-state.json. Tests: - dream-trigger: bypass-when-queue-populated + still-block-when-empty - dream-state-service: within-batch duplicate dedup --- src/server/infra/dream/dream-state-schema.ts | 5 ++- src/server/infra/dream/dream-state-service.ts | 6 +++- src/server/infra/dream/dream-trigger.ts | 8 +++-- src/server/infra/executor/curate-executor.ts | 27 +++++++++++++--- src/server/infra/executor/dream-executor.ts | 10 +++--- .../infra/dream/dream-state-service.test.ts | 11 +++++++ test/unit/infra/dream/dream-trigger.test.ts | 31 +++++++++++++++++++ 7 files changed, 85 insertions(+), 13 deletions(-) diff --git a/src/server/infra/dream/dream-state-schema.ts b/src/server/infra/dream/dream-state-schema.ts index 7fa973f30..48525f725 100644 --- a/src/server/infra/dream/dream-state-schema.ts +++ b/src/server/infra/dream/dream-state-schema.ts @@ -15,7 +15,10 @@ export const PendingMergeSchema = z.object({ */ export const StaleSummaryEntrySchema = z.object({ enqueuedAt: z.number().int().nonnegative(), - path: z.string(), + // Empty paths indicate a bug at the call site (a malformed diff entry would + // resolve to an empty parent dir); reject them at the schema boundary so + // garbage cannot persist into dream-state.json. + path: z.string().min(1), }) export const DreamStateSchema = z.object({ diff --git a/src/server/infra/dream/dream-state-service.ts b/src/server/infra/dream/dream-state-service.ts index 1f941fdf8..ad3597955 100644 --- a/src/server/infra/dream/dream-state-service.ts +++ b/src/server/infra/dream/dream-state-service.ts @@ -81,10 +81,14 @@ export class DreamStateService { */ async enqueueStaleSummaryPaths(paths: string[]): Promise { if (paths.length === 0) return + // Dedup the input itself before checking against the queue — callers may + // pass non-unique arrays (e.g. multiple changed paths within a single + // curate that round-trip through the same parent dir). + const incoming = [...new Set(paths)] const enqueuedAt = Date.now() await this.update((state) => { const existing = new Set(state.staleSummaryPaths.map((e) => e.path)) - const additions = paths + const additions = incoming .filter((p) => !existing.has(p)) .map((p) => ({enqueuedAt, path: p})) if (additions.length === 0) return state diff --git a/src/server/infra/dream/dream-trigger.ts b/src/server/infra/dream/dream-trigger.ts index df20661a0..06722510d 100644 --- a/src/server/infra/dream/dream-trigger.ts +++ b/src/server/infra/dream/dream-trigger.ts @@ -79,8 +79,12 @@ export class DreamTrigger { } } - // Gate 2: Activity - if (state.curationsSinceDream < minCurations) { + // Gate 2: Activity. Bypassed when the stale-summary queue has deferred + // work — leaving entries indefinitely strands `_index.md` regeneration + // in low-activity projects (the very projects ENG-2485 most affects, + // since 1–2 curates over a 12h window otherwise sit under minCurations + // forever). Dream is the canonical drain point; if it has work, run. + if (state.curationsSinceDream < minCurations && state.staleSummaryPaths.length === 0) { return { eligible: false, reason: `Not enough activity (${state.curationsSinceDream} < ${minCurations} curations)`, diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index 9d03c38cf..e7253ba90 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -141,18 +141,37 @@ export class CurateExecutor implements ICurateExecutor { // so a single instance is sufficient and avoids duplicate construction. const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) + // Two independent fail-open concerns: (a) enqueue the deferred + // summary-cascade work to dream's queue; (b) rebuild the search + // manifest. They share `changedPaths` but otherwise are unrelated — + // a transient disk error on the dream-state write must not skip the + // pure-filesystem manifest scan that keeps newly-curated leaf files + // immediately discoverable. Each runs in its own try block so one + // failure cannot mask the other's work. + let changedPaths: string[] = [] if (preState) { try { const postState = await snapshotService.getCurrentState(baseDir) - const changedPaths = diffStates(preState, postState) - if (changedPaths.length > 0) { + changedPaths = diffStates(preState, postState) + } catch { + // Fail-open: snapshot errors leave changedPaths empty → no enqueue, + // no manifest rebuild. Next curate's snapshot will pick up the diff. + } + + if (changedPaths.length > 0) { + try { await dreamStateService.enqueueStaleSummaryPaths(changedPaths) + } catch { + // Fail-open: queue write errors never block curation. The next + // curate's enqueue will still capture the same paths via diffStates. + } + try { const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) await manifestService.buildManifest(baseDir) + } catch { + // Fail-open: manifest rebuild is best-effort pre-warming. } - } catch { - // Fail-open: queue/manifest errors never block curation } } diff --git a/src/server/infra/executor/dream-executor.ts b/src/server/infra/executor/dream-executor.ts index 2869183c0..5719a0504 100644 --- a/src/server/infra/executor/dream-executor.ts +++ b/src/server/infra/executor/dream-executor.ts @@ -279,6 +279,11 @@ export class DreamExecutor { } } + /** + * Runs the three dream operations sequentially, pushing results into `out` after + * each step. Extracted so the executor can preserve partial work when a later step + * throws — and so tests can inject controlled ops without a full LLM round-trip. + */ protected async runOperations(args: { agent: ICipherAgent changedFiles: Set @@ -333,11 +338,6 @@ export class DreamExecutor { ) } - /** - * Runs the three dream operations sequentially, pushing results into `out` after - * each step. Extracted so the executor can preserve partial work when a later step - * throws — and so tests can inject controlled ops without a full LLM round-trip. - */ /** * Regenerate parent `_index.md` files for the given paths and rebuild the * manifest. Extracted as a seam so tests can override and assert which diff --git a/test/unit/infra/dream/dream-state-service.test.ts b/test/unit/infra/dream/dream-state-service.test.ts index 936181480..c4907b5b2 100644 --- a/test/unit/infra/dream/dream-state-service.test.ts +++ b/test/unit/infra/dream/dream-state-service.test.ts @@ -283,6 +283,17 @@ describe('DreamStateService', () => { expect(state.staleSummaryPaths).to.deep.equal([]) }) + it('dedups within-batch duplicates so a single call cannot insert the same path twice', async () => { + // The contract is "dedup by path". A caller passing a non-unique array + // (e.g. multiple changedPaths within a single curate that round-trip + // through the same parent dir) must NOT produce duplicate queue entries. + await service.enqueueStaleSummaryPaths(['auth/jwt.md', 'auth/jwt.md', 'auth/jwt.md']) + + const state = await service.read() + expect(state.staleSummaryPaths).to.have.lengthOf(1) + expect(state.staleSummaryPaths[0].path).to.equal('auth/jwt.md') + }) + it('does not lose entries when 10 enqueues run concurrently', async () => { const paths = Array.from({length: 10}, (_, i) => `domain/topic-${i}.md`) await Promise.all(paths.map((p) => service.enqueueStaleSummaryPaths([p]))) diff --git a/test/unit/infra/dream/dream-trigger.test.ts b/test/unit/infra/dream/dream-trigger.test.ts index 9ac7a9ba2..03ffaa107 100644 --- a/test/unit/infra/dream/dream-trigger.test.ts +++ b/test/unit/infra/dream/dream-trigger.test.ts @@ -79,6 +79,37 @@ describe('DreamTrigger', () => { } }) + it('should bypass activity gate when stale-summary queue has work', async () => { + // ENG-2485: deferred summary cascade lives in staleSummaryPaths. If a + // low-activity project (1-2 curates) accumulates queued paths and the + // activity gate kept blocking, _index.md regeneration would never run. + const deps = makeDeps({ + state: makeState({ + curationsSinceDream: 1, + staleSummaryPaths: [{enqueuedAt: Date.now(), path: 'auth/jwt.md'}], + }), + }) + const trigger = new DreamTrigger(deps) + + const result = await trigger.tryStartDream('/project') + expect(result.eligible).to.be.true + }) + + it('should still fail activity gate when both curations AND queue are empty', async () => { + // Negative case for the bypass: empty queue + low activity means the + // activity gate should still block (nothing to drain, no work to do). + const deps = makeDeps({ + state: makeState({curationsSinceDream: 1, staleSummaryPaths: []}), + }) + const trigger = new DreamTrigger(deps) + + const result = await trigger.tryStartDream('/project') + expect(result.eligible).to.be.false + if (!result.eligible) { + expect(result.reason).to.include('activity') + } + }) + it('should fail when queue is not empty', async () => { const deps = makeDeps({queueLength: 3}) const trigger = new DreamTrigger(deps) From c17a0fa350b9448e94e39a17a3aadff13673d07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 10:34:57 +0700 Subject: [PATCH 04/13] refactor: [ENG-2518] address review-agent feedback on PR #580 - Dedup duplicate contextPath inputs in generateFileAbstractsBatch (codex P1): when a curate touches the same file twice before drain, both queue items had the same path; the prompt emitted two `` blocks the model could answer in either order, and the tag parser's Map collapsed them non-deterministically. Now collapses by path keeping the LATEST content (the FIFO queue's last-seen item) so abstracts reflect the on-disk state, not an intermediate one. - CDATA-wrap file content in batched prompts (Claude review): docs that describe XML/HTML/JSX (perfectly normal for technical knowledge) can contain `` or `` markers that would terminate the envelope and conflate files. Wrap in CDATA with the standard `]]]]>` in-section escape for any nested terminators. - Capture this.generator in a local const at the top of processNext so type narrowing survives the await boundary, and document the batched-retry semantics inline (batch failure consumes one attempt per item, not one per call). Tests: - abstract-generator-batch: duplicate-path dedup keeps latest content - abstract-generator-batch: CDATA wrap survives `` markers - abstract-generator-batch: nested `]]>` is escaped to keep CDATA valid --- src/agent/infra/map/abstract-generator.ts | 39 ++++-- src/agent/infra/map/abstract-queue.ts | 15 ++- .../map/abstract-generator-batch.test.ts | 113 ++++++++++++++++++ 3 files changed, 154 insertions(+), 13 deletions(-) diff --git a/src/agent/infra/map/abstract-generator.ts b/src/agent/infra/map/abstract-generator.ts index 4f272c19f..9bc1f4412 100644 --- a/src/agent/infra/map/abstract-generator.ts +++ b/src/agent/infra/map/abstract-generator.ts @@ -88,11 +88,20 @@ function escapeXmlAttr(value: string): string { return value.replaceAll('&', '&').replaceAll('"', '"').replaceAll('<', '<').replaceAll('>', '>') } +/** + * Wrap raw file content in a CDATA section so XML/HTML/JSX/markdown that + * mentions `` or `` (perfectly normal for docs that describe + * those formats) cannot terminate the envelope and conflate files. The inner + * `]]>` escape is the standard CDATA-in-CDATA trick: split the sequence so it + * never appears verbatim inside the active section. + */ +function wrapCdata(content: string): string { + return `', ']]]]>')}]]>` +} + function buildBatchedAbstractPrompt(items: ReadonlyArray<{content: string; contextPath: string;}>): string { const filesXml = items.map((it) => ` - -${it.content} - +${wrapCdata(it.content)} `).join('\n') return `For each of the following knowledge documents, produce a ONE-LINE summary (max 80 tokens) that is a complete sentence capturing the core topic and key insight. @@ -109,9 +118,7 @@ ${filesXml} function buildBatchedOverviewPrompt(items: ReadonlyArray<{content: string; contextPath: string;}>): string { const filesXml = items.map((it) => ` - -${it.content} - +${wrapCdata(it.content)} `).join('\n') return `For each of the following knowledge documents, produce a structured overview (markdown, under 1500 tokens) that includes: @@ -219,10 +226,22 @@ export async function generateFileAbstractsBatch( ): Promise { if (items.length === 0) return [] - const truncated = items.map((it) => ({ - content: it.fullContent.slice(0, MAX_BATCHED_CONTENT_CHARS_PER_FILE), - contextPath: it.contextPath, - })) + // Dedup by contextPath, keeping the LAST occurrence's content. The queue is + // FIFO so later items carry the most recent fullContent — and the disk file + // already reflects that write, so the abstract must summarize the latest + // state rather than an intermediate one. Without this dedup, duplicate paths + // emit two `` blocks the model may answer in either order; the + // tag parser keys on path and Map-collapses, leaving non-deterministic + // results for the duplicates. + const byPath = new Map() + for (const it of items) { + byPath.set(it.contextPath, { + content: it.fullContent.slice(0, MAX_BATCHED_CONTENT_CHARS_PER_FILE), + contextPath: it.contextPath, + }) + } + + const truncated = [...byPath.values()] const [abstractText, overviewText] = await Promise.all([ streamToText(generator, { diff --git a/src/agent/infra/map/abstract-queue.ts b/src/agent/infra/map/abstract-queue.ts index 2884544d3..2df2fa97b 100644 --- a/src/agent/infra/map/abstract-queue.ts +++ b/src/agent/infra/map/abstract-queue.ts @@ -172,7 +172,11 @@ export class AbstractGenerationQueue { } private async processNext(): Promise { - if (!this.generator || this.processing || this.pending.length === 0) { + // Capture the generator in a local const so type narrowing survives the + // `await` boundary below — TS won't keep `this.generator` narrow across + // suspensions because another async path could reassign the property. + const {generator} = this + if (!generator || this.processing || this.pending.length === 0) { this.resolveDrainersIfIdle() return } @@ -181,7 +185,12 @@ export class AbstractGenerationQueue { this.queueStatusWrite() // Drain up to BATCH_SIZE_CAP items into a single batch. Items beyond the - // cap stay pending for the next cycle. + // cap stay pending for the next cycle. Note: `maxAttempts` counts BATCH + // attempts for this item, not individual-call attempts — a transient + // failure on attempt 1 consumes one retry token for every item in the + // batch, including ones whose content was unrelated to the failure. + // Acceptable: batches are small (cap=5) and the per-item re-enqueue on + // batch failure preserves attempts independently across cycles. const batch = this.pending.splice(0, BATCH_SIZE_CAP) queueLog(`process:start batchSize=${batch.length} remaining=${this.pending.length} retrying=${this.retrying}`) @@ -196,7 +205,7 @@ export class AbstractGenerationQueue { const results = await generateFileAbstractsBatch( batch.map((it) => ({contextPath: it.contextPath, fullContent: it.fullContent})), - this.generator, + generator, ) // Write all batched outputs in parallel. Empty strings are valid (model diff --git a/test/unit/agent/map/abstract-generator-batch.test.ts b/test/unit/agent/map/abstract-generator-batch.test.ts index ceb535464..5405735e4 100644 --- a/test/unit/agent/map/abstract-generator-batch.test.ts +++ b/test/unit/agent/map/abstract-generator-batch.test.ts @@ -112,6 +112,119 @@ describe('generateFileAbstractsBatch', () => { expect(result[0].overviewContent).to.equal('') }) + it('dedups duplicate contextPath inputs, keeping the last item content (most recent state)', async () => { + // Capture the prompt the model receives so we can assert it carries the + // LATEST content (v2), not the older content (v1) for a duplicated path. + let capturedAbstractPrompt = '' + const generator: IContentGenerator = { + estimateTokensSync: () => 10, + generateContent: sandbox.stub().rejects(new Error('n/a')), + generateContentStream: sandbox.stub().callsFake(async function *(req: { + contents?: Array<{content?: string}> + systemPrompt?: string + }) { + const userContent = req.contents?.[0]?.content ?? '' + const isAbstract = (req.systemPrompt ?? '').includes('one-line') + if (isAbstract) capturedAbstractPrompt = userContent + const innerTag = isAbstract ? 'abstract' : 'overview' + yield {content: `<${innerTag}>S`, isComplete: false} + yield {isComplete: true} + }), + } as unknown as IContentGenerator + + const result = await generateFileAbstractsBatch( + [ + {contextPath: 'auth/jwt.md', fullContent: 'v1: original draft'}, + {contextPath: 'auth/jwt.md', fullContent: 'v2: updated content'}, + ], + generator, + ) + + // Only one `` block should appear in the prompt + // (deduped at the generator boundary). Without the dedup, the model would + // see two blocks and may answer them in either order. + const pathOccurrences = (capturedAbstractPrompt.match(/ { + let capturedPrompt = '' + const generator: IContentGenerator = { + estimateTokensSync: () => 10, + generateContent: sandbox.stub().rejects(new Error('n/a')), + generateContentStream: sandbox.stub().callsFake(async function *(req: { + contents?: Array<{content?: string}> + systemPrompt?: string + }) { + capturedPrompt = req.contents?.[0]?.content ?? '' + const isAbstract = (req.systemPrompt ?? '').includes('one-line') + const innerTag = isAbstract ? 'abstract' : 'overview' + yield {content: `<${innerTag}>OK`, isComplete: false} + yield {isComplete: true} + }), + } as unknown as IContentGenerator + + // Content that would break the prompt envelope without CDATA: literal + // and markers, plus an XML-flavored payload. + const treacherousContent = 'A doc explaining the tag and : bar' + + const result = await generateFileAbstractsBatch( + [{contextPath: 'docs/xml.md', fullContent: treacherousContent}], + generator, + ) + + // The raw treacherous text must appear inside a CDATA section, not as + // bare nested elements, so the model parses one document and one file. + expect(capturedPrompt).to.include('') + // The prompt has exactly one opener and exactly one closing + // at the structural level (the body's is now + // inert inside CDATA). + const docOpen = (capturedPrompt.match(//g) ?? []).length + expect(docOpen).to.equal(1, 'exactly one envelope per file') + + // Result still parses cleanly. + expect(result[0].abstractContent).to.equal('OK') + }) + + it('escapes nested CDATA terminators in content so the wrap stays valid', async () => { + let capturedPrompt = '' + const generator: IContentGenerator = { + estimateTokensSync: () => 10, + generateContent: sandbox.stub().rejects(new Error('n/a')), + generateContentStream: sandbox.stub().callsFake(async function *(req: { + contents?: Array<{content?: string}> + }) { + capturedPrompt = req.contents?.[0]?.content ?? '' + yield {content: 'OK', isComplete: false} + yield {isComplete: true} + }), + } as unknown as IContentGenerator + + // Content that contains a literal `]]>` sequence — would terminate CDATA + // prematurely without the in-CDATA escape trick. + await generateFileAbstractsBatch( + [{contextPath: 'x.md', fullContent: 'before ]]> after'}], + generator, + ) + + // The bare `]]>` must NOT appear inside the still-active CDATA section — + // it should be split via `]]]]>`. + expect(capturedPrompt).to.include(']]]]>') + }) + it('issues exactly two LLM calls regardless of batch size (one L0 batch, one L1 batch)', async () => { const l0Response = Array.from({length: 5}, (_, i) => `S${i}.`, From 7838bf446f9140308d292eccd8f3d00099805171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 10:44:50 +0700 Subject: [PATCH 05/13] refactor: [ENG-2485] address second-pass review feedback on PR #579 - Reword the Step 1 snapshot-failure catch in dream-executor: the old comment claimed "atomic drain already removed entries", but drain doesn't run in this code path. Clarify that skipping drain here is safer than draining-and-failing because the atomic-drain RMW clears entries synchronously. - Add a checkEligibility test for the stale-summary queue bypass so both tryStartDream and checkEligibility pin the invariant. They delegate to the same checkGates1to3 path; without this, a future refactor of the shared path could regress only one call site. --- src/server/infra/executor/dream-executor.ts | 11 ++++++----- test/unit/infra/dream/dream-trigger.test.ts | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/server/infra/executor/dream-executor.ts b/src/server/infra/executor/dream-executor.ts index 5719a0504..d0426d6b2 100644 --- a/src/server/infra/executor/dream-executor.ts +++ b/src/server/infra/executor/dream-executor.ts @@ -129,11 +129,12 @@ export class DreamExecutor { try { preState = await snapshotService.getCurrentState(projectRoot) } catch { - // Fail-open: if snapshot fails, the entire step 5 block (propagation + - // queue drain) is skipped — the stale-summary queue is not consumed in - // this dream cycle and accumulates until the next successful run. - // Drain-skip is preferable to drain-and-lose because atomic drain - // already removed entries before any propagation could re-enqueue them. + // Fail-open: leaving preState undefined skips the entire step 5 block + // (queue drain + propagation), so the stale-summary queue is left + // intact for the next successful dream cycle. Skipping drain here is + // safer than drain-then-fail: the atomic-drain design clears entries + // synchronously inside the RMW, so if we drained and then threw + // before reaching the catch's re-enqueue, the snapshot would be lost. } // Step 2: Load dream state diff --git a/test/unit/infra/dream/dream-trigger.test.ts b/test/unit/infra/dream/dream-trigger.test.ts index 03ffaa107..6c5465e5f 100644 --- a/test/unit/infra/dream/dream-trigger.test.ts +++ b/test/unit/infra/dream/dream-trigger.test.ts @@ -296,6 +296,22 @@ describe('DreamTrigger', () => { } }) + it('should bypass activity gate when stale-summary queue has work', async () => { + // Symmetry with the tryStartDream bypass test — both methods delegate + // to checkGates1to3, so a future refactor of the shared path must keep + // this invariant on both call sites. + const deps = makeDeps({ + state: makeState({ + curationsSinceDream: 1, + staleSummaryPaths: [{enqueuedAt: Date.now(), path: 'auth/jwt.md'}], + }), + }) + const trigger = new DreamTrigger(deps) + + const result = await trigger.checkEligibility('/project') + expect(result.eligible).to.be.true + }) + it('should fail when queue is not empty', async () => { const deps = makeDeps({queueLength: 3}) const trigger = new DreamTrigger(deps) From 887264f61777535393b3d1997b65f32b4b67efa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Wed, 29 Apr 2026 11:13:41 +0700 Subject: [PATCH 06/13] refactor: [ENG-2518] address second-pass review feedback on PR #580 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Re-anchor parseBatchedTags on `` openers instead of `` closers (real correctness): the model output is plain text, not CDATA-wrapped, so an overview that mentions `` literally in prose (perfectly normal for docs about XML, JSX, or build systems) would prematurely terminate the outer match and orphan the inner tag. The new strategy collects all opener positions, then slices each opener's body up to the next opener (or end-of-string) — a literal `` in prose has no special meaning to the parser. - Improve the CDATA test to capture BOTH L0 and L1 prompts and assert the wrap independently on each builder. The previous test only validated the last-call's prompt, so a future refactor that forgot wrapCdata in one builder would slip past. - Add a parser test for literal `` inside model overview prose to pin the new robustness invariant. - Move "buffers below cap", "processes up to BATCH_SIZE_CAP", and "splits oversized backlogs" tests out of the `status file` describe block into a sibling `batching behaviour` block — these verify scheduling, not status I/O, and the misgrouping made the suite harder to navigate. --- src/agent/infra/map/abstract-generator.ts | 30 +++++++--- .../map/abstract-generator-batch.test.ts | 55 +++++++++++++++---- test/unit/agent/map/abstract-queue.test.ts | 48 ++++++++-------- 3 files changed, 92 insertions(+), 41 deletions(-) diff --git a/src/agent/infra/map/abstract-generator.ts b/src/agent/infra/map/abstract-generator.ts index 9bc1f4412..5788eb5a9 100644 --- a/src/agent/infra/map/abstract-generator.ts +++ b/src/agent/infra/map/abstract-generator.ts @@ -144,21 +144,35 @@ ${filesXml} * Extract ... per from the model output. * Tolerant: ignores extra whitespace, supports nested newlines inside the inner * tag. Returns a Map keyed by path. Paths that don't appear are absent. + * + * Anchored on `` openers (not `` closers) so a model + * overview that mentions `` literally in prose — perfectly normal for + * docs about XML, JSX, or build systems — cannot prematurely terminate the + * outer match and orphan the inner tag. Each opener owns the response slice + * up to the next opener (or end-of-string), and the inner regex extracts + * the payload from that slice. */ function parseBatchedTags(response: string, innerTag: 'abstract' | 'overview'): Map { const result = new Map() - // Match ...BODY...; lazy on body so - // multiple blocks don't bleed into one. The `.` flag-less regex relies - // on the s-flag (dotAll) to span newlines. - const fileRe = /]*>([\s\S]*?)<\/file>/g + const fileOpenerRe = /]*>/g const innerRe = new RegExp(`<${innerTag}>([\\s\\S]*?)<\\/${innerTag}>`) + const openers: Array<{bodyStart: number; rawPath: string}> = [] let m: null | RegExpExecArray - while ((m = fileRe.exec(response)) !== null) { - const [, rawPath, innerXml] = m - const path = rawPath.replaceAll('&', '&').replaceAll('"', '"').replaceAll('<', '<').replaceAll('>', '>') - const inner = innerRe.exec(innerXml) + while ((m = fileOpenerRe.exec(response)) !== null) { + openers.push({bodyStart: fileOpenerRe.lastIndex, rawPath: m[1]}) + } + + for (const [i, opener] of openers.entries()) { + // Each opener's slice runs from its end to the start of the next opener + // (or end-of-string). Within that slice, the inner regex picks up the + // payload. A literal `` in prose has no special meaning here. + const sliceEnd = i + 1 < openers.length ? openers[i + 1].bodyStart : response.length + const slice = response.slice(opener.bodyStart, sliceEnd) + const inner = innerRe.exec(slice) if (inner) { + const path = opener.rawPath + .replaceAll('&', '&').replaceAll('"', '"').replaceAll('<', '<').replaceAll('>', '>') result.set(path, inner[1].trim()) } } diff --git a/test/unit/agent/map/abstract-generator-batch.test.ts b/test/unit/agent/map/abstract-generator-batch.test.ts index 5405735e4..7bb14dd17 100644 --- a/test/unit/agent/map/abstract-generator-batch.test.ts +++ b/test/unit/agent/map/abstract-generator-batch.test.ts @@ -160,7 +160,11 @@ describe('generateFileAbstractsBatch', () => { }) it('CDATA-wraps file content so XML/HTML markers in the body cannot break the envelope', async () => { - let capturedPrompt = '' + // Capture BOTH the L0 and L1 prompts so we assert the wrap is applied to + // each independent builder. Without separate captures, we'd only validate + // the last call's prompt — a future refactor that forgot wrapCdata in one + // builder would slip past. + const capturedPrompts: {abstract?: string; overview?: string} = {} const generator: IContentGenerator = { estimateTokensSync: () => 10, generateContent: sandbox.stub().rejects(new Error('n/a')), @@ -168,8 +172,9 @@ describe('generateFileAbstractsBatch', () => { contents?: Array<{content?: string}> systemPrompt?: string }) { - capturedPrompt = req.contents?.[0]?.content ?? '' const isAbstract = (req.systemPrompt ?? '').includes('one-line') + if (isAbstract) capturedPrompts.abstract = req.contents?.[0]?.content ?? '' + else capturedPrompts.overview = req.contents?.[0]?.content ?? '' const innerTag = isAbstract ? 'abstract' : 'overview' yield {content: `<${innerTag}>OK`, isComplete: false} yield {isComplete: true} @@ -185,20 +190,48 @@ describe('generateFileAbstractsBatch', () => { generator, ) - // The raw treacherous text must appear inside a CDATA section, not as - // bare nested elements, so the model parses one document and one file. - expect(capturedPrompt).to.include('') - // The prompt has exactly one opener and exactly one closing - // at the structural level (the body's is now - // inert inside CDATA). - const docOpen = (capturedPrompt.match(//g) ?? []).length - expect(docOpen).to.equal(1, 'exactly one envelope per file') + // Both prompts (L0 and L1) must independently wrap content in CDATA. + for (const [label, prompt] of [['abstract', capturedPrompts.abstract], ['overview', capturedPrompts.overview]] as const) { + expect(prompt, `${label} prompt was captured`).to.be.a('string') + const promptText = prompt ?? '' + expect(promptText, `${label} prompt opens CDATA`).to.include('') + // Exactly one structural opener per file (the body's literal + // is now inert inside CDATA). + const docOpen = (promptText.match(//g) ?? []).length + expect(docOpen, `${label} prompt has exactly one envelope`).to.equal(1) + } // Result still parses cleanly. expect(result[0].abstractContent).to.equal('OK') }) + it('parser is robust to a literal appearing inside the model overview prose', async () => { + // The model output is plain text (NOT CDATA-wrapped). A document about + // build systems / XML / JSX may legitimately produce overview prose like + // "the closing tag in Ant build files…" — a closer-anchored + // parser would stop at the premature and orphan the inner tag. + const l0Response = 'Ant build files use a closing tag.' + const l1Response = ` +- Mentions the closing tag in build prose +- Still must round-trip cleanly +` + + const generator = makeScriptedGenerator(sandbox, [l0Response, l1Response]) + const result = await generateFileAbstractsBatch( + [{contextPath: 'ant.md', fullContent: 'Ant build files'}], + generator, + ) + + // Both fields are populated despite the literal in the body — + // the parser anchors on `` openers, not `` closers. + expect(result).to.have.lengthOf(1) + expect(result[0].abstractContent).to.include('Ant build files') + expect(result[0].abstractContent).to.include('') + expect(result[0].overviewContent).to.include('Mentions the ') + expect(result[0].overviewContent).to.include('Still must round-trip cleanly') + }) + it('escapes nested CDATA terminators in content so the wrap stays valid', async () => { let capturedPrompt = '' const generator: IContentGenerator = { diff --git a/test/unit/agent/map/abstract-queue.test.ts b/test/unit/agent/map/abstract-queue.test.ts index 2339837a1..cb65671ba 100644 --- a/test/unit/agent/map/abstract-queue.test.ts +++ b/test/unit/agent/map/abstract-queue.test.ts @@ -231,6 +231,32 @@ describe('AbstractGenerationQueue', () => { expect(written.pending).to.equal(1) }) + it('status file reflects retrying items in pending count', async () => { + const {generator, rejectNextCall} = makeControlledGenerator(sandbox) + const q = new AbstractGenerationQueue(tmpDir, 2) + + q.setGenerator(generator) + q.enqueue({contextPath: join(tmpDir, 'file.md'), fullContent: 'content'}) + + await new Promise((r) => { setImmediate(r) }) + rejectNextCall(new Error('fail')) + await new Promise((r) => { setImmediate(r) }) + await new Promise((r) => { setImmediate(r) }) + + // Status file is written during retrying++ branch — wait for disk I/O to flush + await new Promise((r) => { setTimeout(r, 50) }) + + const statusPath = join(tmpDir, '.brv', '_queue_status.json') + const raw = await fs.readFile(statusPath, 'utf8') + const written = JSON.parse(raw) as {pending: number; processing: boolean} + expect(written.pending).to.equal(1) // retrying item must appear in status file + expect(written.processing).to.equal(false) + }) + }) + + // ── batching behaviour ───────────────────────────────────────────────────── + + describe('batching behaviour', () => { it('buffers items below BATCH_SIZE_CAP without firing LLM calls', async () => { const successfulGenerator = makeSuccessfulGenerator(sandbox) const q = new AbstractGenerationQueue(tmpDir) @@ -303,27 +329,5 @@ describe('AbstractGenerationQueue', () => { expect(stub.callCount).to.equal(4, 'Expected 4 stream calls for 7 items split into batches of 5+2') expect(q.getStatus().processed).to.equal(N) }) - - it('status file reflects retrying items in pending count', async () => { - const {generator, rejectNextCall} = makeControlledGenerator(sandbox) - const q = new AbstractGenerationQueue(tmpDir, 2) - - q.setGenerator(generator) - q.enqueue({contextPath: join(tmpDir, 'file.md'), fullContent: 'content'}) - - await new Promise((r) => { setImmediate(r) }) - rejectNextCall(new Error('fail')) - await new Promise((r) => { setImmediate(r) }) - await new Promise((r) => { setImmediate(r) }) - - // Status file is written during retrying++ branch — wait for disk I/O to flush - await new Promise((r) => { setTimeout(r, 50) }) - - const statusPath = join(tmpDir, '.brv', '_queue_status.json') - const raw = await fs.readFile(statusPath, 'utf8') - const written = JSON.parse(raw) as {pending: number; processing: boolean} - expect(written.pending).to.equal(1) // retrying item must appear in status file - expect(written.processing).to.equal(false) - }) }) }) From 19db5bad297bbbdfbce5c8e35588d5ae25017b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Fri, 1 May 2026 22:00:01 +0700 Subject: [PATCH 07/13] feat: [ENG-2519] enable prefix caching for providers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the caching change from experiment/03-enable-caching to proj/curation-enhancement. Two coupled behavior changes (must ship together) that engage Anthropic prompt caching cleanly and make OpenAI/Google auto-caches see a longer stable prefix: - Attach providerOptions.anthropic.cacheControl: ephemeral to the system message and the last tool definition. The system message now enters as a SystemModelMessage in the messages array (not via AI SDK's top-level `system` parameter, which strips providerOptions). - Relocate out of the system prompt into the first user message. DateTimeContributor was rebuilding the timestamp every iteration, and Anthropic prefix caching does token-level matching — one byte of churn ~1,800 tokens in poisoned every cache miss past it. Moving dateTime to a single iteration-0 user-message prefix keeps the system prefix byte-stable across iterations. Verified across a 60-curate fixture-driven A/B and a 30-curate progression run on proj/curation-enhancement (see notes/token-usage-reduction/eng-2519-prefix-caching/REPORT.md): - Anthropic claude-haiku-4-5 12-fixture: -21.5% normalized cost - Anthropic claude-haiku-4-5 30-curate progression: -29.7% - OpenAI gpt-4.1: -8.0% (auto-cache hits a longer stable prefix) - Google gemini-3-flash-preview: ~flat (model-specific noise) Tests: - New unit tests for prependCachedSystemMessage edge cases (empty/ undefined systemPrompt, non-mutation invariant) and toAiSdkTools' last-tool cache_control attachment (1, 3 tools). - Updated internal-llm-service test for completeTask: assertion now matches the iter-0 user-message dateTime prefix. Out of scope (deferred): - Cache-token field extraction in response.usage (cacheReadTokens, cacheCreationTokens). Without telemetry to consume them they would be dead code. - Telemetry / UsageLogger plumbing — separate ticket. - Anthropic daemon-stability mitigations (TPM retry-loop hangs, daemon-readiness races on large fixtures) — operational, separate. Builds: clean. Typecheck: clean. Lint: clean on touched files. Tests: 7004 passing, 16 pending, 0 failing. --- src/agent/infra/agent/service-initializer.ts | 10 +++- src/agent/infra/llm/agent-llm-service.ts | 28 ++++------- .../generators/ai-sdk-content-generator.ts | 30 +++++++++-- .../generators/ai-sdk-message-converter.ts | 9 +++- .../ai-sdk-content-generator.test.ts | 41 +++++++++++++++ .../ai-sdk-message-converter.test.ts | 50 +++++++++++++++++++ .../infra/llm/internal-llm-service.test.ts | 9 +++- 7 files changed, 150 insertions(+), 27 deletions(-) create mode 100644 test/unit/agent/llm/generators/ai-sdk-content-generator.test.ts create mode 100644 test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts diff --git a/src/agent/infra/agent/service-initializer.ts b/src/agent/infra/agent/service-initializer.ts index 6b470f18b..7f27fdc76 100644 --- a/src/agent/infra/agent/service-initializer.ts +++ b/src/agent/infra/agent/service-initializer.ts @@ -193,12 +193,18 @@ export async function createCipherAgentServices( basePath: promptsBasePath, validateConfig: true, }) - // Register default contributors + // Register default contributors. + // + // Note: dateTime is intentionally NOT in the system prompt. Anthropic + // prompt caching does token-level prefix matching, so a per-iteration + // refreshed timestamp here would invalidate the cache for everything + // past it. dateTime is instead injected into the first user message + // by AgentLLMService, where it lives after the cache breakpoints and + // does not poison the cached prefix. systemPromptManager.registerContributors([ {enabled: true, filepath: 'system-prompt.yml', id: 'base', priority: 0, type: 'file'}, {enabled: true, id: 'env', priority: 10, type: 'environment'}, {enabled: true, id: 'memories', priority: 20, type: 'memory'}, - {enabled: true, id: 'datetime', priority: 30, type: 'dateTime'}, ]) // Register context tree structure contributor for query/curate commands diff --git a/src/agent/infra/llm/agent-llm-service.ts b/src/agent/infra/llm/agent-llm-service.ts index 51802594e..e29f741c2 100644 --- a/src/agent/infra/llm/agent-llm-service.ts +++ b/src/agent/infra/llm/agent-llm-service.ts @@ -902,8 +902,11 @@ export class AgentLLMService implements ILLMService { this.cachedBasePrompt = basePrompt this.memoryDirtyFlag = false } else { - // Cache hit: reuse base prompt, only refresh the DateTime section - basePrompt = this.refreshDateTime(this.cachedBasePrompt!) + // Cache hit: reuse base prompt verbatim. The cached prompt has no + // dateTime section to refresh — dateTime is injected into the + // first user message instead so the system prefix stays byte-stable + // across iterations and prompt caching can engage cleanly. + basePrompt = this.cachedBasePrompt! } let systemPrompt = basePrompt @@ -944,9 +947,13 @@ export class AgentLLMService implements ILLMService { // Add user message and compress context within mutex lock return this.mutex.withLock(async () => { - // Add user message to context only on the first iteration + // Add user message to context only on the first iteration. The + // dateTime block is prefixed here (not in the system prompt) so + // the cached system prefix stays byte-stable across iterations + // and Anthropic/OpenAI/Google prefix caches can engage cleanly. if (iterationCount === 0) { - await this.contextManager.addUserMessage(textInput, imageData, fileData) + const inputWithDateTime = `Current date and time: ${new Date().toISOString()}\n\n${textInput}` + await this.contextManager.addUserMessage(inputWithDateTime, imageData, fileData) } // Rolling checkpoint: periodically save progress and clear history for RLM commands. @@ -1555,19 +1562,6 @@ export class AgentLLMService implements ILLMService { }) } - /** - * Replace the DateTime section in a cached system prompt with a fresh timestamp. - * DateTimeContributor wraps its output in ... XML tags, - * enabling reliable regex replacement without rebuilding the entire prompt. - * - * @param cachedPrompt - Previously cached system prompt - * @returns Updated prompt with fresh DateTime - */ - private refreshDateTime(cachedPrompt: string): string { - const freshDateTime = `Current date and time: ${new Date().toISOString()}` - return cachedPrompt.replace(/[\S\s]*?<\/dateTime>/, freshDateTime) - } - /** * Check if a rolling checkpoint should trigger. * Triggers every N iterations for curate/query commands, or when token utilization is high. diff --git a/src/agent/infra/llm/generators/ai-sdk-content-generator.ts b/src/agent/infra/llm/generators/ai-sdk-content-generator.ts index 9fd961d42..1d2e66d03 100644 --- a/src/agent/infra/llm/generators/ai-sdk-content-generator.ts +++ b/src/agent/infra/llm/generators/ai-sdk-content-generator.ts @@ -5,7 +5,7 @@ * Replaces per-provider content generators with one unified implementation. */ -import type {LanguageModel} from 'ai' +import type {LanguageModel, ModelMessage} from 'ai' import {generateText, streamText} from 'ai' @@ -22,6 +22,28 @@ import {toAiSdkTools, toModelMessages} from './ai-sdk-message-converter.js' const DEFAULT_CHARS_PER_TOKEN = 4 +/** + * Prepend the system prompt as a system-role message carrying + * `providerOptions.anthropic.cacheControl: ephemeral`. AI SDK's top-level + * `system: string` parameter does not propagate providerOptions, so the + * only way to attach Anthropic cache_control to the system block is to + * pass it through the messages array. Non-Anthropic providers ignore the + * `anthropic` namespace. + */ +export function prependCachedSystemMessage(systemPrompt: string | undefined, messages: ModelMessage[]): ModelMessage[] { + if (!systemPrompt) { + return messages + } + + const systemMessage: ModelMessage = { + content: systemPrompt, + providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}}, + role: 'system', + } + + return [systemMessage, ...messages] +} + /** * Configuration for AiSdkContentGenerator. */ @@ -54,7 +76,7 @@ export class AiSdkContentGenerator implements IContentGenerator { } public async generateContent(request: GenerateContentRequest): Promise { - const messages = toModelMessages(request.contents) + const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents)) const tools = toAiSdkTools(request.tools) const result = await generateText({ @@ -63,7 +85,6 @@ export class AiSdkContentGenerator implements IContentGenerator { messages, model: this.model, temperature: request.config.temperature, - ...(request.systemPrompt && {system: request.systemPrompt}), ...(tools && {tools}), ...(request.config.topK !== undefined && {topK: request.config.topK}), ...(request.config.topP !== undefined && {topP: request.config.topP}), @@ -100,7 +121,7 @@ export class AiSdkContentGenerator implements IContentGenerator { } public async *generateContentStream(request: GenerateContentRequest): AsyncGenerator { - const messages = toModelMessages(request.contents) + const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents)) const tools = toAiSdkTools(request.tools) const result = streamText({ @@ -109,7 +130,6 @@ export class AiSdkContentGenerator implements IContentGenerator { messages, model: this.model, temperature: request.config.temperature, - ...(request.systemPrompt && {system: request.systemPrompt}), ...(tools && {tools}), ...(request.config.topK !== undefined && {topK: request.config.topK}), ...(request.config.topP !== undefined && {topP: request.config.topP}), diff --git a/src/agent/infra/llm/generators/ai-sdk-message-converter.ts b/src/agent/infra/llm/generators/ai-sdk-message-converter.ts index 49a10e37e..cd61a8d95 100644 --- a/src/agent/infra/llm/generators/ai-sdk-message-converter.ts +++ b/src/agent/infra/llm/generators/ai-sdk-message-converter.ts @@ -63,18 +63,25 @@ export function toModelMessages(messages: InternalMessage[]): ModelMessage[] { /** * Convert our ToolSet to AI SDK tool definitions. * Tools are declared without `execute` — our agentic loop handles execution. + * + * The last tool gets `providerOptions.anthropic.cacheControl: ephemeral`, + * which makes Anthropic cache the entire tool block (and the system prompt + * before it). Non-Anthropic providers ignore the `anthropic` namespace. */ export function toAiSdkTools(tools?: InternalToolSet): Record> | undefined { if (!tools || Object.keys(tools).length === 0) { return undefined } + const entries = Object.entries(tools) const result: Record> = {} - for (const [name, def] of Object.entries(tools)) { + for (const [index, [name, def]] of entries.entries()) { + const isLast = index === entries.length - 1 result[name] = aiSdkTool({ description: def.description ?? '', inputSchema: jsonSchema(def.parameters as Record), + ...(isLast && {providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}}}), }) } diff --git a/test/unit/agent/llm/generators/ai-sdk-content-generator.test.ts b/test/unit/agent/llm/generators/ai-sdk-content-generator.test.ts new file mode 100644 index 000000000..d5244bb0a --- /dev/null +++ b/test/unit/agent/llm/generators/ai-sdk-content-generator.test.ts @@ -0,0 +1,41 @@ +import type {ModelMessage} from 'ai' + +import {expect} from 'chai' + +import {prependCachedSystemMessage} from '../../../../../src/agent/infra/llm/generators/ai-sdk-content-generator.js' + +describe('prependCachedSystemMessage', () => { + const userMsg: ModelMessage = {content: 'hi', role: 'user'} + + it('returns the input messages unchanged when systemPrompt is undefined', () => { + const result = prependCachedSystemMessage(undefined, [userMsg]) + expect(result).to.deep.equal([userMsg]) + }) + + it('returns the input messages unchanged when systemPrompt is the empty string', () => { + const result = prependCachedSystemMessage('', [userMsg]) + expect(result).to.deep.equal([userMsg]) + }) + + it('prepends a system-role message with cacheControl providerOptions when systemPrompt is non-empty', () => { + const result = prependCachedSystemMessage('You are a helpful assistant.', [userMsg]) + + expect(result).to.have.length(2) + + const [system, user] = result + expect(system.role).to.equal('system') + expect(system.content).to.equal('You are a helpful assistant.') + expect(system.providerOptions).to.deep.equal({ + anthropic: {cacheControl: {type: 'ephemeral'}}, + }) + expect(user).to.deep.equal(userMsg) + }) + + it('does not mutate the original messages array', () => { + const original: ModelMessage[] = [userMsg] + const result = prependCachedSystemMessage('sys', original) + + expect(original).to.have.length(1) + expect(result).to.not.equal(original) + }) +}) diff --git a/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts new file mode 100644 index 000000000..dcec0badc --- /dev/null +++ b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts @@ -0,0 +1,50 @@ +import {expect} from 'chai' + +import type {ToolSet as InternalToolSet} from '../../../../../src/agent/core/domain/tools/types.js' + +import {toAiSdkTools} from '../../../../../src/agent/infra/llm/generators/ai-sdk-message-converter.js' + +function makeTool(description: string): InternalToolSet[string] { + return { + description, + parameters: {properties: {}, type: 'object'}, + } +} + +function getProviderOptions(tool: unknown): Record | undefined { + if (!tool || typeof tool !== 'object') return undefined + return (tool as {providerOptions?: Record}).providerOptions +} + +const EPHEMERAL_CACHE_CONTROL = {anthropic: {cacheControl: {type: 'ephemeral'}}} + +describe('toAiSdkTools — anthropic cache_control on last tool', () => { + it('returns undefined when tools is undefined or empty', () => { + expect(toAiSdkTools()).to.equal(undefined) + expect(toAiSdkTools({})).to.equal(undefined) + }) + + it('attaches cache_control to the single tool when only one is registered', () => { + const tools: InternalToolSet = {onlyTool: makeTool('the only one')} + const result = toAiSdkTools(tools) + expect(result).to.exist + expect(getProviderOptions(result?.onlyTool)).to.deep.equal(EPHEMERAL_CACHE_CONTROL) + }) + + it('attaches cache_control to the LAST tool only when multiple are registered', () => { + const tools: InternalToolSet = { + firstTool: makeTool('first'), + lastTool: makeTool('last'), + middleTool: makeTool('middle'), + } + const result = toAiSdkTools(tools) + expect(result).to.exist + + // Object literal key order is alphabetical here (lint sort-objects), so + // entry iteration is firstTool → lastTool → middleTool. The "last" + // iterated entry is middleTool, which is what should carry cacheControl. + expect(getProviderOptions(result?.firstTool)).to.equal(undefined) + expect(getProviderOptions(result?.lastTool)).to.equal(undefined) + expect(getProviderOptions(result?.middleTool)).to.deep.equal(EPHEMERAL_CACHE_CONTROL) + }) +}) diff --git a/test/unit/infra/llm/internal-llm-service.test.ts b/test/unit/infra/llm/internal-llm-service.test.ts index 26a2afd34..10fa11938 100644 --- a/test/unit/infra/llm/internal-llm-service.test.ts +++ b/test/unit/infra/llm/internal-llm-service.test.ts @@ -788,8 +788,13 @@ describe('AgentLLMService', () => { await service.completeTask('Analyze this image', {imageData}) - // Verify imageData was passed to addUserMessage - expect(addUserMessageStub.calledWith('Analyze this image', imageData)).to.be.true + // Verify imageData was passed to addUserMessage. The first argument now + // includes a `` prefix injected at iteration 0 to keep the + // system prefix byte-stable for prompt caching, so we match by suffix. + expect(addUserMessageStub.calledOnce).to.be.true + const [firstArg, secondArg] = addUserMessageStub.firstCall.args as [string, typeof imageData] + expect(firstArg).to.match(/.*<\/dateTime>\n\nAnalyze this image$/s) + expect(secondArg).to.equal(imageData) }) }) }) From 493e848b21eb799fc41ab2fd4100140aba0b879f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Fri, 1 May 2026 22:56:29 +0700 Subject: [PATCH 08/13] refactor: [ENG-2519] address review-agent feedback on dateTime cache hazards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the unused `datetime` template variable from FileContributor's renderTemplateVariables. There is no current YAML consumer; leaving it in place was a landmine — any future `{{datetime}}` reference would silently re-introduce per-iteration cache poisoning. - Inject the dateTime block into the rolling-checkpoint continuation prompt. clearHistory() wipes the iter-0 user message that originally carried it, and the iter-0 guard upstream prevents re-injection, so long curate/query runs that hit the checkpoint were losing time context for every subsequent iteration. - Extract `buildDateTimePrefix(now?)` as a small shared helper used by both call sites; cover with unit tests. - Expand the cache_control "last tool" test comment to clarify that the contract is insertion-order, not alphabetical. --- src/agent/infra/llm/agent-llm-service.ts | 22 +++++++++++-- .../contributors/file-contributor.ts | 8 +++-- test/unit/agent/llm/agent-llm-service.test.ts | 32 +++++++++++++++++++ .../ai-sdk-message-converter.test.ts | 11 +++++-- 4 files changed, 65 insertions(+), 8 deletions(-) create mode 100644 test/unit/agent/llm/agent-llm-service.test.ts diff --git a/src/agent/infra/llm/agent-llm-service.ts b/src/agent/infra/llm/agent-llm-service.ts index e29f741c2..92fdeb98b 100644 --- a/src/agent/infra/llm/agent-llm-service.ts +++ b/src/agent/infra/llm/agent-llm-service.ts @@ -60,6 +60,18 @@ import {type ProcessedOutput, ToolOutputProcessor, type TruncationConfig} from ' /** Target utilization ratio for message tokens (leaves headroom for response) */ const TARGET_MESSAGE_TOKEN_UTILIZATION = 0.7 +/** + * Build a `...\n\n` prefix for a user-message body. + * + * Per-call timestamps must NOT enter the system prompt (they would poison + * the prefix cache). They are injected into the user message instead, at + * the boundaries where the model legitimately needs fresh time context: + * the iter-0 input, and after a rolling-checkpoint history clear. + */ +export function buildDateTimePrefix(now: Date = new Date()): string { + return `Current date and time: ${now.toISOString()}\n\n` +} + /** * Result of parallel tool execution (before adding to context). * Contains all information needed to add the result to context in order. @@ -952,7 +964,7 @@ export class AgentLLMService implements ILLMService { // the cached system prefix stays byte-stable across iterations // and Anthropic/OpenAI/Google prefix caches can engage cleanly. if (iterationCount === 0) { - const inputWithDateTime = `Current date and time: ${new Date().toISOString()}\n\n${textInput}` + const inputWithDateTime = `${buildDateTimePrefix()}${textInput}` await this.contextManager.addUserMessage(inputWithDateTime, imageData, fileData) } @@ -1547,8 +1559,12 @@ export class AgentLLMService implements ILLMService { // Clear conversation history await this.contextManager.clearHistory() - // Re-inject continuation prompt with variable reference - const continuationPrompt = [ + // Re-inject continuation prompt with variable reference. + // Prepend the dateTime block: clearHistory wiped the iter-0 user + // message that originally carried it, and the iter-0 guard upstream + // prevents re-injection. Without this, every iteration after the + // first checkpoint loses time context for the rest of the run. + const continuationPrompt = buildDateTimePrefix() + [ `Continue task. Iteration checkpoint at turn ${iterationCount}.`, `Previous progress stored in variable: ${checkpointVar}`, `Original task: ${textInput.slice(0, 200)}${textInput.length > 200 ? '...' : ''}`, diff --git a/src/agent/infra/system-prompt/contributors/file-contributor.ts b/src/agent/infra/system-prompt/contributors/file-contributor.ts index fc5fd8970..e5fd8c83e 100644 --- a/src/agent/infra/system-prompt/contributors/file-contributor.ts +++ b/src/agent/infra/system-prompt/contributors/file-contributor.ts @@ -150,12 +150,16 @@ export class FileContributor implements SystemPromptContributor { private renderTemplateVariables(template: string, context: ContributorContext): string { let result = template - // Build variables from context + // Build variables from context. + // Note: a `datetime` template variable is intentionally NOT exposed here. + // Per-call timestamps must never enter the system prompt — they would + // poison the prefix cache from that byte onward. The current date/time + // is injected once into the iter-0 user message instead (see + // agent-llm-service.ts). /* eslint-disable camelcase */ const variables: Record = { available_markers: context.availableMarkers ? Object.keys(context.availableMarkers).join(', ') : '', available_tools: context.availableTools?.join(', ') ?? '', - datetime: `Current date and time: ${new Date().toISOString()}`, } /* eslint-enable camelcase */ diff --git a/test/unit/agent/llm/agent-llm-service.test.ts b/test/unit/agent/llm/agent-llm-service.test.ts new file mode 100644 index 000000000..ed3b21ae0 --- /dev/null +++ b/test/unit/agent/llm/agent-llm-service.test.ts @@ -0,0 +1,32 @@ +import {expect} from 'chai' + +import {buildDateTimePrefix} from '../../../../src/agent/infra/llm/agent-llm-service.js' + +describe('buildDateTimePrefix', () => { + it('renders the supplied date as an ISO-8601 dateTime block followed by a blank line', () => { + const fixed = new Date('2026-05-01T10:30:00.000Z') + const result = buildDateTimePrefix(fixed) + + expect(result).to.equal('Current date and time: 2026-05-01T10:30:00.000Z\n\n') + }) + + it('uses the current time when no date is supplied', () => { + const before = Date.now() + const result = buildDateTimePrefix() + const after = Date.now() + + const match = /Current date and time: (\S+)<\/dateTime>\n\n$/.exec(result) + expect(match).to.not.equal(null) + + const rendered = match === null ? 0 : Date.parse(match[1]) + expect(rendered).to.be.at.least(before) + expect(rendered).to.be.at.most(after) + }) + + it('terminates with a double-newline so the prefix can be concatenated directly to a text body', () => { + const result = buildDateTimePrefix(new Date('2026-01-01T00:00:00.000Z')) + const composed = `${result}body` + + expect(composed).to.equal('Current date and time: 2026-01-01T00:00:00.000Z\n\nbody') + }) +}) diff --git a/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts index dcec0badc..23c49f5b8 100644 --- a/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts +++ b/test/unit/agent/llm/generators/ai-sdk-message-converter.test.ts @@ -40,9 +40,14 @@ describe('toAiSdkTools — anthropic cache_control on last tool', () => { const result = toAiSdkTools(tools) expect(result).to.exist - // Object literal key order is alphabetical here (lint sort-objects), so - // entry iteration is firstTool → lastTool → middleTool. The "last" - // iterated entry is middleTool, which is what should carry cacheControl. + // The cache_control marker is attached to the LAST entry by insertion + // order, NOT by name. In production, tool registration is deterministic + // (driven by getToolNamesForCommand), so the "last" entry is stable. + // In this test, the object literal is alphabetically sorted by the + // sort-objects lint rule, so iteration order is + // firstTool → lastTool → middleTool — and middleTool ends up last, + // which is what should carry cacheControl. This test pins the + // insertion-order contract, not an alphabetical or name-based one. expect(getProviderOptions(result?.firstTool)).to.equal(undefined) expect(getProviderOptions(result?.lastTool)).to.equal(undefined) expect(getProviderOptions(result?.middleTool)).to.deep.equal(EPHEMERAL_CACHE_CONTROL) From 06cc5d07d98a3a5e0b41127fdf0e124afad780e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Sat, 2 May 2026 02:08:38 +0700 Subject: [PATCH 09/13] feat: [ENG-2530] pre-pipeline recon to skip first agent iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run tools.curation.recon synchronously in CurateExecutor before spawning the agent session, inject the result both as a sandbox variable (__recon_result_) and as inline fields in the agent's first prompt. Agent's first iteration then proceeds directly to extraction instead of paying a full LLM call to invoke recon. Recon is pure JS — no LLM, no decision — but was previously surfaced as an agent-tool, costing a full ~13K-token iteration on every curate just to invoke a deterministic helper. Measurements (12-fixture A/B vs proj/curation-enhancement + ENG-2519 caching baseline, full results in notes/token-usage-reduction/ eng-2530-prepipeline-recon/REPORT.md): OpenAI gpt-4.1 -16.3% calls -20.3% normalized cost Google gemini-3-flash-preview -1.9% calls -17.8% normalized cost Anthropic claude-haiku-4-5 -2.0% calls -0.9% normalized cost (flat) ByteRover gemini-3-flash-preview 15-curate progression: -6.5% normalized cost / -3.4% \$ cost. Quality risk is bounded: recon itself is unchanged; only the caller moves. Per-fixture variance follows the team's exp 04 pattern (some fixtures regress as the agent compensates for missing recon signal in later iterations); aggregate nets positive on OpenAI and Google. Anthropic claude-haiku-4-5 lands flat — different model-class behavior from the team's claude-sonnet-4-5 measurement; not a regression. --- src/server/infra/executor/curate-executor.ts | 22 +++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index e7253ba90..06a210728 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -4,6 +4,7 @@ import type {ICipherAgent} from '../../../agent/core/interfaces/i-cipher-agent.j import type {CurationStatus} from '../../core/domain/entities/curation-status.js' import type {CurateExecuteOptions, ICurateExecutor} from '../../core/interfaces/executor/i-curate-executor.js' +import {recon as reconHelper} from '../../../agent/infra/sandbox/curation-helpers.js' import {BRV_DIR} from '../../constants.js' import {FileValidationError} from '../../core/domain/errors/task-error.js' import { @@ -100,12 +101,26 @@ export class CurateExecutor implements ICurateExecutor { type: 'string', } - // Inject context, metadata, empty history, and taskId into the TASK session's sandbox + // Pre-pipeline the recon step (deterministic helper) so the agent loop + // doesn't spend its first iteration calling tools.curation.recon. The + // result is injected as a sandbox variable for code-exec access AND + // its key findings are surfaced inline in the prompt so the agent's + // first iteration can proceed directly to extraction. recon is pure + // JS — no LLM judgment is needed for whether to call it; the answer + // is always "yes, first thing." Surfacing it as an agent-tool meant + // paying a full LLM iteration just to invoke a deterministic helper. + const initialHistory = {entries: [], totalProcessed: 0} + const reconResult = reconHelper(effectiveContext, metadata, initialHistory) + const reconVar = `__recon_result_${taskIdSafe}` + + // Inject context, metadata, empty history, taskId, and pre-computed + // recon result into the TASK session's sandbox. const taskIdVar = `__taskId_${taskIdSafe}` agent.setSandboxVariableOnSession(taskSessionId, ctxVar, effectiveContext) - agent.setSandboxVariableOnSession(taskSessionId, histVar, {entries: [], totalProcessed: 0}) + agent.setSandboxVariableOnSession(taskSessionId, histVar, initialHistory) agent.setSandboxVariableOnSession(taskSessionId, metaVar, metadata) agent.setSandboxVariableOnSession(taskSessionId, taskIdVar, taskId) + agent.setSandboxVariableOnSession(taskSessionId, reconVar, reconResult) // Prompt with curation helpers guidance (tools.curation.* replaces manual infrastructure code) const prompt = [ @@ -114,7 +129,8 @@ export class CurateExecutor implements ICurateExecutor { `History variable: ${histVar}`, `Metadata variable: ${metaVar}`, `Task ID variable: ${taskIdVar} (pass as bare variable, not a string)`, - `IMPORTANT: Do NOT print raw context. Start with tools.curation.recon(${ctxVar}, ${metaVar}, ${histVar}) to assess.`, + `Recon already computed in ${reconVar}: suggestedMode=${reconResult.suggestedMode}, suggestedChunkCount=${reconResult.suggestedChunkCount}, charCount=${reconResult.meta.charCount}, lineCount=${reconResult.meta.lineCount}, messageCount=${reconResult.meta.messageCount}.`, + `IMPORTANT: Do NOT print raw context. Do NOT call tools.curation.recon — it has been pre-computed. Proceed directly to extraction.`, `For chunked extraction use tools.curation.mapExtract(). Pass taskId: ${taskIdVar} (bare variable).`, `IMPORTANT: Any code_exec call containing mapExtract MUST use timeout: 300000 on the code_exec tool call itself (not inside mapExtract options).`, `Use tools.curation.groupBySubject() and tools.curation.dedup() to organize extractions.`, From 550cc70e3d6e16c3f79e2c6ab0a8108060e11b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Sat, 2 May 2026 09:44:47 +0700 Subject: [PATCH 10/13] test: [ENG-2530] cover recon pre-pipeline wiring on curate-executor Pin the central correctness invariant of the change: recon's result is injected as `__recon_result_` on the task session, and the agent prompt instructs the loop to skip the now-deterministic call. Without this test a refactor could silently remove the synchronous `recon()` call or the prompt instruction, defeating the whole change. --- .../infra/executor/curate-executor.test.ts | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/test/unit/infra/executor/curate-executor.test.ts b/test/unit/infra/executor/curate-executor.test.ts index 885ea854c..96c5898aa 100644 --- a/test/unit/infra/executor/curate-executor.test.ts +++ b/test/unit/infra/executor/curate-executor.test.ts @@ -309,4 +309,62 @@ describe('CurateExecutor (regression)', () => { expect(enqueueStub.firstCall.args[0]).to.deep.equal(['auth/jwt.md']) }) }) + + describe('pre-pipelined recon (ENG-2530)', () => { + it('injects __recon_result_ as a sandbox variable and surfaces it in the prompt', async () => { + const taskId = '7a2b9e10-cdef-4321-8765-0abcdef01234' + const taskIdSafe = taskId.replaceAll('-', '_') + const expectedReconVar = `__recon_result_${taskIdSafe}` + + const setSandboxVariableOnSession = stub() + const executeOnSession = stub().resolves('ok') + + const agent = { + cancel: stub().resolves(false), + createTaskSession: stub().resolves('session-id'), + deleteSandboxVariable: stub(), + deleteSandboxVariableOnSession: stub(), + deleteSession: stub().resolves(true), + deleteTaskSession: stub().resolves(), + execute: stub().resolves(''), + executeOnSession, + generate: stub().resolves({content: '', toolCalls: [], usage: {inputTokens: 0, outputTokens: 0}}), + getSessionMetadata: stub().resolves(), + getState: stub().returns({currentIteration: 0, executionHistory: [], executionState: 'idle', toolCallsExecuted: 0}), + listPersistedSessions: stub().resolves([]), + reset: stub(), + setSandboxVariable: stub(), + setSandboxVariableOnSession, + start: stub().resolves(), + stream: stub().resolves({[Symbol.asyncIterator]: () => ({next: () => Promise.resolve({done: true, value: undefined})})}), + } as unknown as ICipherAgent + + const executor = new CurateExecutor() + await executor.executeWithAgent(agent, { + clientCwd: '/projects/myapp', + content: 'plain text content for the curate to inspect', + taskId, + }) + + // (a) recon result was set on the task session under __recon_result_ + const reconCall = setSandboxVariableOnSession + .getCalls() + .find((c) => c.args[1] === expectedReconVar) + expect(reconCall, `no setSandboxVariableOnSession call with key ${expectedReconVar}`).to.not.equal(undefined) + + const reconValue = reconCall?.args[2] as Record | undefined + expect(reconValue).to.have.property('suggestedMode') + expect(reconValue).to.have.property('suggestedChunkCount') + expect(reconValue).to.have.property('meta') + expect((reconValue?.meta as Record)).to.have.property('charCount') + expect((reconValue?.meta as Record)).to.have.property('lineCount') + + // (b) prompt instructs the agent that recon is pre-computed and to skip the call + expect(executeOnSession.calledOnce).to.equal(true) + const promptArg = executeOnSession.firstCall.args[1] as string + expect(promptArg).to.include('Recon already computed in') + expect(promptArg).to.include(expectedReconVar) + expect(promptArg).to.include('Do NOT call tools.curation.recon') + }) + }) }) From 8941d797100a147ad6a95371b8bed09b39b60fb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Thu=E1=BA=ADn=20Ph=C3=A1t?= Date: Sat, 2 May 2026 10:56:57 +0700 Subject: [PATCH 11/13] refactor: [ENG-2530] address review-agent feedback on PR #593 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - curate-executor: add comment near reconHelper(...) noting that the `metadata` argument is currently a no-op — `recon` recomputes counts from `effectiveContext` directly. Prevents future readers from assuming changes to `metadata` would alter `reconResult`. Upstream signature fix is out of scope for this PR. - test: add stubs for FileContextTreeSnapshotService.getCurrentState and DreamStateService.incrementCurationCount so the unit test stays in-memory (mirrors the ENG-2485 test pattern); keeps to CLAUDE.md's "unit tests must run completely in memory" rule. - test: assert meta.messageCount alongside charCount and lineCount (the prompt surfaces all three). --- src/server/infra/executor/curate-executor.ts | 5 +++++ test/unit/infra/executor/curate-executor.test.ts | 12 ++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index 06a210728..99226f556 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -110,6 +110,11 @@ export class CurateExecutor implements ICurateExecutor { // is always "yes, first thing." Surfacing it as an agent-tool meant // paying a full LLM iteration just to invoke a deterministic helper. const initialHistory = {entries: [], totalProcessed: 0} + // The `metadata` arg is currently unused by `recon` — the helper + // recomputes char/line/message counts from `effectiveContext` + // directly. Passed through here to match the helper's existing + // signature; do NOT assume changing `metadata` will alter + // `reconResult`. const reconResult = reconHelper(effectiveContext, metadata, initialHistory) const reconVar = `__recon_result_${taskIdSafe}` diff --git a/test/unit/infra/executor/curate-executor.test.ts b/test/unit/infra/executor/curate-executor.test.ts index 96c5898aa..126cbbda9 100644 --- a/test/unit/infra/executor/curate-executor.test.ts +++ b/test/unit/infra/executor/curate-executor.test.ts @@ -339,6 +339,12 @@ describe('CurateExecutor (regression)', () => { stream: stub().resolves({[Symbol.asyncIterator]: () => ({next: () => Promise.resolve({done: true, value: undefined})})}), } as unknown as ICipherAgent + // Stub the post-curate filesystem services so the test stays + // fully in-memory (mirrors the ENG-2485 test above). Without + // these, executeWithAgent attempts real I/O against /projects/myapp. + stub(FileContextTreeSnapshotService.prototype, 'getCurrentState').resolves(new Map()) + stub(DreamStateService.prototype, 'incrementCurationCount').resolves() + const executor = new CurateExecutor() await executor.executeWithAgent(agent, { clientCwd: '/projects/myapp', @@ -356,8 +362,10 @@ describe('CurateExecutor (regression)', () => { expect(reconValue).to.have.property('suggestedMode') expect(reconValue).to.have.property('suggestedChunkCount') expect(reconValue).to.have.property('meta') - expect((reconValue?.meta as Record)).to.have.property('charCount') - expect((reconValue?.meta as Record)).to.have.property('lineCount') + const meta = reconValue?.meta as Record + expect(meta).to.have.property('charCount') + expect(meta).to.have.property('lineCount') + expect(meta).to.have.property('messageCount') // (b) prompt instructs the agent that recon is pre-computed and to skip the call expect(executeOnSession.calledOnce).to.equal(true) From 1ae81cdeabac2357c9973f3f700e9f0f8324dd18 Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Mon, 4 May 2026 10:22:01 +0700 Subject: [PATCH 12/13] merge: sync main (3.10.1) into curation-enhancement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves three conflicts created by the overlap of ENG-2485 (defer summary cascade to dream, on this branch) and ENG-2522 (detach post-curate Phase 4, shipped on main, PR #582). The two changes touch the same lines but address different costs — LLM cost vs user-visible latency — and compose cleanly. curate-executor.ts - Adopt main's {finalize, response} architecture so the daemon fires task:completed before Phase 4 runs. - Inside finalize, keep cascade-defer: snapshot diff → enqueueStaleSummaryPaths + buildManifest. Drop main's propagateSummariesUnderLock — the dream lock existed to coordinate inline _index.md writes, which cascade-defer eliminates from curate. - Preserve all exp 03 (cache_control) and exp 04 (recon pre-pipelining) changes in Phases 1-3. - Extract propagateAndRebuild private helper alongside main's incrementDreamCounter. dream-executor.ts - Keep this branch's runStaleSummaryPropagation extracted seam (main did not change dream-executor; nothing lost). curate-executor.test.ts - Rewrite runAgentBody/finalize split assertions to expect cascade-defer (enqueueStaleSummaryPaths + buildManifest) instead of propagateStaleness. - Drop the dream-lock coordination describe block — the lock is no longer taken in curate's Phase 4 (folder-pack still uses it). folder-pack-executor still calls propagateSummariesUnderLock; the helper file stays. Verified: typecheck, lint (0 errors on resolved files), 7284 tests pass. --- CHANGELOG.md | 12 + CLAUDE.md | 8 +- package.json | 7 +- .../curate-tool-task-context.ts | 31 ++ .../tools/implementations/curate-tool.ts | 71 +++- src/oclif/commands/review.ts | 120 ++++++ src/server/core/domain/entities/brv-config.ts | 28 ++ .../core/domain/entities/provider-registry.ts | 1 - src/server/core/domain/transport/schemas.ts | 6 + src/server/core/domain/transport/task-info.ts | 7 + .../interfaces/i-provider-config-store.ts | 6 +- .../infra/context-tree/propagate-summaries.ts | 69 ++++ src/server/infra/daemon/agent-process.ts | 96 ++++- src/server/infra/daemon/brv-server.ts | 24 ++ src/server/infra/daemon/post-work-registry.ts | 98 +++++ src/server/infra/dream/dream-lock-service.ts | 7 +- src/server/infra/executor/curate-executor.ts | 153 +++++--- src/server/infra/executor/dream-executor.ts | 61 +++- .../infra/executor/folder-pack-executor.ts | 49 +-- src/server/infra/git/git-error-messages.ts | 24 ++ .../infra/git/isomorphic-git-service.ts | 84 ++--- src/server/infra/git/status-row-classifier.ts | 108 ++++++ .../infra/process/curate-log-handler.ts | 15 +- src/server/infra/process/feature-handlers.ts | 1 + src/server/infra/process/task-router.ts | 57 ++- .../infra/process/transport-handlers.ts | 7 +- .../storage/file-provider-config-store.ts | 8 +- .../transport/handlers/provider-handler.ts | 58 +++ .../transport/handlers/review-handler.ts | 42 +++ src/shared/transport/events/review-events.ts | 14 + src/shared/transport/types/dto.ts | 2 + .../provider/components/provider-flow.tsx | 37 +- test/commands/review-toggle.test.ts | 195 ++++++++++ .../infra/git/isomorphic-git-service.test.ts | 103 +++++- .../git/status-tuple-reachability.test.ts | 345 ++++++++++++++++++ test/unit/agent/tools/curate-tool.test.ts | 208 ++++++++++- .../core/domain/entities/brv-config.test.ts | 49 +++ .../domain/entities/provider-registry.test.ts | 5 + .../infra/daemon/post-work-registry.test.ts | 282 ++++++++++++++ .../infra/executor/curate-executor.test.ts | 114 ++++++ .../infra/executor/dream-executor.test.ts | 169 ++++++++- .../executor/folder-pack-executor.test.ts | 104 +++++- .../unit/infra/git/git-error-messages.test.ts | 58 +++ .../infra/git/status-row-classifier.test.ts | 245 +++++++++++++ .../infra/process/curate-log-handler.test.ts | 129 +++++++ test/unit/infra/process/task-router.test.ts | 81 ++++ .../transport/handlers/model-handler.test.ts | 32 ++ .../handlers/provider-handler.test.ts | 223 +++++++++++ .../transport/handlers/review-handler.test.ts | 165 +++++++++ .../transport/handlers/vc-handler.test.ts | 36 +- 50 files changed, 3644 insertions(+), 210 deletions(-) create mode 100644 src/agent/infra/tools/implementations/curate-tool-task-context.ts create mode 100644 src/oclif/commands/review.ts create mode 100644 src/server/infra/context-tree/propagate-summaries.ts create mode 100644 src/server/infra/daemon/post-work-registry.ts create mode 100644 src/server/infra/git/git-error-messages.ts create mode 100644 src/server/infra/git/status-row-classifier.ts create mode 100644 test/commands/review-toggle.test.ts create mode 100644 test/integration/infra/git/status-tuple-reachability.test.ts create mode 100644 test/unit/infra/daemon/post-work-registry.test.ts create mode 100644 test/unit/infra/git/git-error-messages.test.ts create mode 100644 test/unit/infra/git/status-row-classifier.test.ts create mode 100644 test/unit/infra/transport/handlers/review-handler.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 94af2f45a..757185524 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ All notable user-facing changes to ByteRover CLI will be documented in this file. +## [3.10.1] + +### Added +- **`brv review --disable` / `--enable` per project.** Opt a project out of the human-in-the-loop review prompts and backups, or turn them back on. Run `brv review` with no flags to see the current state. Existing `brv review pending / approve / reject` are unchanged. + +### Changed +- **`brv curate` returns to your prompt right away.** The final summary-rebuild step now runs in the background instead of making you wait. Nothing is skipped, just moved off your wait time (about 18 seconds saved on a typical run). + +### Fixed +- **Clearer `brv vc status` and `brv vc pull` errors.** Status now catches a few staged-then-edited file states it used to hide. When a pull, checkout, or merge would overwrite local changes, the error now lists the affected files instead of just saying "local changes would be overwritten." +- **OpenAI-compatible providers fail loudly when the URL is wrong.** First-time setup for Ollama, LM Studio, and similar providers now checks the base URL up front and shows the error inline. Bad URLs no longer pre-select a placeholder `llama3` model or hang the REPL on disconnect / reconnect. + ## [3.10.0] ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 7872635b9..86487ef79 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -61,7 +61,7 @@ npm run dev:ui # Vite dev server for the web UI - `server/` — Daemon infrastructure: `config/`, `core/` (domain/interfaces), `infra/` (31 modules, including vc, git, hub, mcp, cogit, project, provider-oauth, space, dream, webui), `templates/`, `utils/` - `shared/` — Cross-module: constants, types, transport events, utils - `tui/` — React/Ink TUI: app (router/pages), components, features (23 modules, including vc, worktree, source, hub, curate), hooks, lib, providers, stores -- `webui/` — Browser dashboard (React/Vite). Entry `src/webui/index.tsx`; `features/` (15 panels), `pages/` (home, changes, configuration, contexts, tasks, analytics, project-selector), `layouts/`, `stores/`. Connects to the daemon via Socket.IO; no imports from `server/`, `agent/`, or `tui/` (same boundary rule) +- `webui/` — Browser dashboard (React/Vite). Entry `src/webui/index.tsx`; `features/` (15 panels), `pages/` (8 pages: home, changes, configuration, contexts, tasks, analytics, project-selector, not-found), `layouts/`, `stores/`. Connects to the daemon via Socket.IO; no imports from `server/`, `agent/`, or `tui/` (same boundary rule) - `oclif/` — Commands grouped by topic (`vc/`, `hub/`, `worktree/`, `source/`, `space/`, `review/`, `connectors/`, `curate/`, `model/`, `providers/`, `swarm/`, `query-log/`) + top-level `.ts` commands (incl. `webui.ts`); hooks, lib (daemon-client, task-client, json-response) **Import boundary** (ESLint-enforced): `tui/` must not import from `server/`, `agent/`, or `oclif/`. Use transport events or `shared/`. @@ -101,7 +101,8 @@ npm run dev:ui # Vite dev server for the web UI - Canonical project resolver: `resolveProject()` in `server/infra/project/` — priority `flag > direct > linked > walked-up > null`. `projectRoot` and `worktreeRoot` are threaded through transport schemas, task routing, and all executors - All commands are daemon-routed: `oclif/` and `tui/` never import from `server/` - Oclif: `src/oclif/commands/{vc,worktree,source}/`; TUI: `src/tui/features/{vc,worktree,source}/`; slash commands (`vc-*`, `worktree`, `source`) in `src/tui/features/commands/definitions/` -- `brv curate` blocks execution by default and rejects overlapping runs for the same project; pass `--detach` to run in background. Behavioral contract lives in `src/server/templates/sections/` (`brv-instructions.md`, `workflow.md`, `skill/SKILL.md`) — the in-daemon agent reads these at runtime +- `brv curate` runs Phases 1–3 in the foreground and detaches Phase 4 (post-curate finalization: summary regeneration, manifest rebuild) to the daemon's `PostWorkRegistry`, which serializes per project and coordinates with `dream-lock-service.ts` to prevent concurrent `_index.md` writes. `--detach` makes the entire run background. Overlapping curate runs for the same project are still rejected. Behavioral contract lives in `src/server/templates/sections/` (`brv-instructions.md`, `workflow.md`, `skill/SKILL.md`) — the in-daemon agent reads these at runtime +- `brv review [--disable | --enable]` — toggle the project-scoped HITL review log; `brv review pending` lists items, `brv review approve ` / `brv review reject ` resolve them. When disabled, sync curate skips the "X operations require review" prompt, detached curate stops emitting per-operation review markers, and `brv dream` no longer surfaces `needsReview` operations. The flag is snapshotted at task creation and propagated via `AsyncLocalStorage` (`resolveReviewDisabled`) so mid-task toggles do not race - `brv login` defaults to OAuth (interactive provider picker); pass `--api-key` only for CI. `brv logout` clears credentials ### Agent (`src/agent/`) @@ -142,6 +143,9 @@ npm run dev:ui # Vite dev server for the web UI - `BRV_UI_SOURCE` — `submodule` | `package` — forces Vite's shared-UI resolution mode - `BRV_DATA_DIR` — override the global data dir (default `~/.brv`) - `BRV_GIT_REMOTE_BASE_URL` — override git remote base URL (beta vs prod testing) +- `BRV_QUEUE_TRACE` — set to `1` to log queue/agent map traces (cipher-agent, abstract-queue) +- `BRV_SESSION_LOG` — file path for daemon/agent session logs (auto-set by `brv-server`; can override for debugging) +- `BRV_E2E_MODE` — `true` switches the daemon to e2e-friendly stdio handling ## Stack diff --git a/package.json b/package.json index d3c54f911..2ac114a10 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "coding-assistant", "knowledge-management" ], - "version": "3.10.0", + "version": "3.10.1", "author": "ByteRover", "bin": { "brv": "./bin/run.js" @@ -231,6 +231,9 @@ "ink-spinner", "ink-text-input", "ink-scroll-list", - "ink-scroll-view" + "ink-scroll-view", + "@tanstack/react-query", + "react-router-dom", + "zustand" ] } diff --git a/src/agent/infra/tools/implementations/curate-tool-task-context.ts b/src/agent/infra/tools/implementations/curate-tool-task-context.ts new file mode 100644 index 000000000..7911aab08 --- /dev/null +++ b/src/agent/infra/tools/implementations/curate-tool-task-context.ts @@ -0,0 +1,31 @@ +import {AsyncLocalStorage} from 'node:async_hooks' + +/** + * Async-context scope for the daemon-stamped `reviewDisabled` value. + * + * The daemon snapshots the project's reviewDisabled flag once at task-create + * and forwards it via TaskExecute. The agent process opens an + * `AsyncLocalStorage` scope around the task body so any descendant async + * callsite — direct curate-tool invocation, sandbox `tools.curate(...)` via + * CurateService, or anything else awaiting under the same chain — observes the + * single snapshot value instead of re-reading `.brv/config.json` (which can + * race with mid-task user toggles). + * + * Same propagation pattern as `CurateResultCollector` + * (src/agent/infra/sandbox/curate-result-collector.ts): AsyncLocalStorage flows + * through the LLM streaming pipeline and the in-process sandbox, so callers + * without an explicit taskId still see the right value. + * + * Outside any scope, `getCurrentReviewDisabled()` returns `undefined` and the + * caller falls back to its own resolution path (currently a `.brv/config.json` + * read in `executeCurate`). + */ +const reviewDisabledStorage = new AsyncLocalStorage() + +export function runWithReviewDisabled(reviewDisabled: boolean, fn: () => Promise): Promise { + return reviewDisabledStorage.run(reviewDisabled, fn) +} + +export function getCurrentReviewDisabled(): boolean | undefined { + return reviewDisabledStorage.getStore() +} diff --git a/src/agent/infra/tools/implementations/curate-tool.ts b/src/agent/infra/tools/implementations/curate-tool.ts index a43fc73bc..73f17e7c4 100644 --- a/src/agent/infra/tools/implementations/curate-tool.ts +++ b/src/agent/infra/tools/implementations/curate-tool.ts @@ -24,6 +24,7 @@ import {toSnakeCase} from '../../../../server/utils/file-helpers.js' import {deriveImpactFromLoss, detectStructuralLoss} from '../../../core/domain/knowledge/conflict-detector.js' import {resolveStructuralLoss} from '../../../core/domain/knowledge/conflict-resolver.js' import {ToolName} from '../../../core/domain/tools/constants.js' +import {getCurrentReviewDisabled} from './curate-tool-task-context.js' /** * Called after each successful context file write so callers can @@ -469,7 +470,13 @@ export interface CurateOutput { * @param filePath - Absolute path to the context tree file being modified * @param basePath - Context tree base path (e.g., '.brv/context-tree') */ -async function backupBeforeWrite(filePath: string, basePath: string): Promise { +async function backupBeforeWrite(filePath: string, basePath: string, reviewDisabled: boolean): Promise { + // Honor `brv review --disable`: backups exist solely to support review rejection + // (restore from backup). With reviews disabled, they are dead state — skip creation + // so review-backups/ stays empty. Snapshot taken once at executeCurate top so all + // ops in this tool call observe a consistent value even if the user toggles mid-task. + if (reviewDisabled) return + try { const brvDir = dirname(resolve(basePath)) const relativePath = relative(resolve(basePath), resolve(filePath)) @@ -487,6 +494,30 @@ async function backupBeforeWrite(filePath: string, basePath: string): Promise/config.json` and returns the `reviewDisabled` flag. + * Returns false (review enabled) on any error so a missing/corrupt config never + * silently swallows backups that protect the rejection path. + */ +async function isReviewDisabledForBrvDir(brvDir: string): Promise { + try { + const raw = await DirectoryManager.readFile(join(brvDir, 'config.json')) + const parsed: unknown = JSON.parse(raw) + return hasReviewDisabledField(parsed) && parsed.reviewDisabled === true + } catch { + return false + } +} + function generateDomainContextMarkdown(domainName: string, context: DomainContext): string { const sections: string[] = [`# Domain: ${domainName}`, '', '## Purpose', context.purpose, '', '## Scope'] @@ -880,6 +911,7 @@ function maxImpact( async function executeUpdate( basePath: string, operation: Operation, + reviewDisabled: boolean, onAfterWrite?: WriteCallback, runtimeSignalStore?: IRuntimeSignalStore, logger?: ILogger, @@ -990,7 +1022,7 @@ async function executeUpdate( summary, timestamps, }) - await backupBeforeWrite(contextPath, basePath) + await backupBeforeWrite(contextPath, basePath, reviewDisabled) await DirectoryManager.writeFileAtomic(contextPath, contextContent) onAfterWrite?.(contextPath, contextContent) @@ -1031,6 +1063,7 @@ async function executeUpdate( async function executeUpsert( basePath: string, operation: Operation, + reviewDisabled: boolean, onAfterWrite?: WriteCallback, runtimeSignalStore?: IRuntimeSignalStore, logger?: ILogger, @@ -1082,7 +1115,7 @@ async function executeUpsert( if (exists) { // File exists - delegate to UPDATE logic - const result = await executeUpdate(basePath, {...operation, type: 'UPDATE'}, onAfterWrite, runtimeSignalStore, logger) + const result = await executeUpdate(basePath, {...operation, type: 'UPDATE'}, reviewDisabled, onAfterWrite, runtimeSignalStore, logger) // Return with UPSERT type but indicate it was an update return { ...result, @@ -1117,6 +1150,7 @@ async function executeUpsert( async function executeMerge( basePath: string, operation: Operation, + reviewDisabled: boolean, onAfterWrite?: WriteCallback, runtimeSignalStore?: IRuntimeSignalStore, logger?: ILogger, @@ -1229,8 +1263,8 @@ async function executeMerge( const previousSummary = targetParsedContent.summary // Backup both files before merge modifies target and deletes source - await backupBeforeWrite(targetContextPath, basePath) - await backupBeforeWrite(sourceContextPath, basePath) + await backupBeforeWrite(targetContextPath, basePath, reviewDisabled) + await backupBeforeWrite(sourceContextPath, basePath, reviewDisabled) // Capture source sidecar signals BEFORE any destructive operation so a // mid-flow crash cannot leave the target unmerged with an orphaned @@ -1321,6 +1355,7 @@ async function executeMerge( async function executeDelete( basePath: string, operation: Operation, + reviewDisabled: boolean, runtimeSignalStore?: IRuntimeSignalStore, logger?: ILogger, ): Promise { @@ -1358,7 +1393,7 @@ async function executeDelete( // Best-effort: summary extraction failure must never block delete } - await backupBeforeWrite(filePath, basePath) + await backupBeforeWrite(filePath, basePath, reviewDisabled) await DirectoryManager.deleteFile(filePath) await deleteDerivedSiblings(filePath) @@ -1417,7 +1452,7 @@ async function executeDelete( // Best-effort: summary extraction failure must never block delete } - await Promise.all(mdFiles.map((f) => backupBeforeWrite(f, basePath))) + await Promise.all(mdFiles.map((f) => backupBeforeWrite(f, basePath, reviewDisabled))) await DirectoryManager.deleteTopicRecursive(fullPath) // Dual-write: drop sidecar entries for every markdown file that was @@ -1490,8 +1525,20 @@ export async function executeCurate( const {basePath, operations} = parseResult.data + // Prefer the daemon-stamped value (snapshotted at task-create on the daemon side, + // forwarded via TaskExecute, opened as an AsyncLocalStorage scope in agent-process so + // it propagates through any async chain — direct tool call, sandbox `tools.curate(...)` + // via CurateService, or ingest-resource-tool). The `.brv/config.json` read is the + // fall-back for callers outside any scope. Sharing one value across daemon + // (CurateLogHandler) and agent (this tool) keeps reviewStatus and backups consistent: + // without it, a user toggle mid-task could mark ops as pending review (daemon snapshot + // held) while skipping backups (agent re-read picks up new value), making rejection + // un-restorable. + const scopedReviewDisabled = getCurrentReviewDisabled() + const reviewDisabled = scopedReviewDisabled ?? (await isReviewDisabledForBrvDir(dirname(resolve(basePath)))) + const onAfterWrite: undefined | WriteCallback = abstractQueue - ? (contextPath, content) => { abstractQueue.enqueue({contextPath, fullContent: content}) } + ? (contextPath, content) => abstractQueue.enqueue({contextPath, fullContent: content}) : undefined const applied: OperationResult[] = [] @@ -1516,7 +1563,7 @@ export async function executeCurate( } case 'DELETE': { - result = await executeDelete(basePath, operation, runtimeSignalStore, logger) + result = await executeDelete(basePath, operation, reviewDisabled, runtimeSignalStore, logger) if (result.status === 'success') summary.deleted++ @@ -1524,7 +1571,7 @@ export async function executeCurate( } case 'MERGE': { - result = await executeMerge(basePath, operation, onAfterWrite, runtimeSignalStore, logger) + result = await executeMerge(basePath, operation, reviewDisabled, onAfterWrite, runtimeSignalStore, logger) if (result.status === 'success') summary.merged++ @@ -1532,7 +1579,7 @@ export async function executeCurate( } case 'UPDATE': { - result = await executeUpdate(basePath, operation, onAfterWrite, runtimeSignalStore, logger) + result = await executeUpdate(basePath, operation, reviewDisabled, onAfterWrite, runtimeSignalStore, logger) if (result.status === 'success') summary.updated++ @@ -1540,7 +1587,7 @@ export async function executeCurate( } case 'UPSERT': { - result = await executeUpsert(basePath, operation, onAfterWrite, runtimeSignalStore, logger) + result = await executeUpsert(basePath, operation, reviewDisabled, onAfterWrite, runtimeSignalStore, logger) // UPSERT counts as either added or updated based on what happened if (result.status === 'success') { diff --git a/src/oclif/commands/review.ts b/src/oclif/commands/review.ts new file mode 100644 index 000000000..900f59282 --- /dev/null +++ b/src/oclif/commands/review.ts @@ -0,0 +1,120 @@ +import {Command, Flags} from '@oclif/core' + +import { + ReviewEvents, + type ReviewGetDisabledResponse, + type ReviewSetDisabledResponse, +} from '../../shared/transport/events/review-events.js' +import {type DaemonClientOptions, formatConnectionError, withDaemonRetry} from '../lib/daemon-client.js' +import {writeJsonResponse} from '../lib/json-response.js' + +export default class Review extends Command { + public static description = `Toggle the HITL review log for the current project + +When disabled: +- 'brv curate' (sync mode) no longer prints the "X operations require review" prompt +- Curate-log entries written in '--detach' mode no longer carry the per-operation review marker +- 'brv dream' no longer surfaces its own needsReview operations as pending reviews +- 'brv review pending' will not list any new entries until re-enabled` + public static examples = [ + '# Show current state', + '<%= config.bin %> <%= command.id %>', + '', + '# Disable the review log for this project', + '<%= config.bin %> <%= command.id %> --disable', + '', + '# Re-enable the review log', + '<%= config.bin %> <%= command.id %> --enable', + ] + public static flags = { + disable: Flags.boolean({ + default: false, + description: 'Disable the HITL review log for this project', + exclusive: ['enable'], + }), + enable: Flags.boolean({ + default: false, + description: 'Re-enable the HITL review log for this project', + exclusive: ['disable'], + }), + format: Flags.string({ + default: 'text', + description: 'Output format (text or json)', + options: ['text', 'json'], + }), + } + + protected getDaemonClientOptions(): DaemonClientOptions { + return {} + } + + public async run(): Promise { + const {flags} = await this.parse(Review) + const format: 'json' | 'text' = flags.format === 'json' ? 'json' : 'text' + + try { + if (flags.disable || flags.enable) { + const target = flags.disable + const response = await withDaemonRetry( + (client) => + client.requestWithAck(ReviewEvents.SET_DISABLED, {reviewDisabled: target}), + this.getDaemonClientOptions(), + ) + this.reportToggle(response.reviewDisabled, format) + return + } + + const response = await withDaemonRetry( + (client) => client.requestWithAck(ReviewEvents.GET_DISABLED, {}), + this.getDaemonClientOptions(), + ) + this.reportStatus(response.reviewDisabled, format) + } catch (error) { + this.reportError(error, format) + } + } + + private reportError(error: unknown, format: 'json' | 'text'): void { + const message = error instanceof Error ? error.message : 'Review failed' + if (format === 'json') { + writeJsonResponse({ + command: 'review', + data: {error: message, status: 'error'}, + success: false, + }) + return + } + + this.log(formatConnectionError(error)) + } + + private reportStatus(disabled: boolean, format: 'json' | 'text'): void { + if (format === 'json') { + writeJsonResponse({ + command: 'review', + data: {reviewDisabled: disabled, status: 'success'}, + success: true, + }) + return + } + + this.log(disabled ? 'Review log is disabled.' : 'Review log is enabled.') + } + + private reportToggle(disabled: boolean, format: 'json' | 'text'): void { + if (format === 'json') { + writeJsonResponse({ + command: 'review', + data: {reviewDisabled: disabled, status: 'success'}, + success: true, + }) + return + } + + this.log( + disabled + ? `Review log disabled. To re-enable: ${this.config.bin} review --enable` + : `Review log enabled. To disable: ${this.config.bin} review --disable`, + ) + } +} diff --git a/src/server/core/domain/entities/brv-config.ts b/src/server/core/domain/entities/brv-config.ts index 69b087316..866780f55 100644 --- a/src/server/core/domain/entities/brv-config.ts +++ b/src/server/core/domain/entities/brv-config.ts @@ -15,6 +15,7 @@ export type BrvConfigParams = { createdAt: string cwd?: string ide?: Agent + reviewDisabled?: boolean spaceId?: string spaceName?: string teamId?: string @@ -88,6 +89,7 @@ const isBrvConfigJson = (json: unknown): json is BrvConfigFromJson => { if (obj.cipherAgentSystemPrompt !== undefined && typeof obj.cipherAgentSystemPrompt !== 'string') return false if (obj.cipherAgentModes !== undefined && !Array.isArray(obj.cipherAgentModes)) return false if (obj.version !== undefined && typeof obj.version !== 'string') return false + if (obj.reviewDisabled !== undefined && typeof obj.reviewDisabled !== 'boolean') return false return true } @@ -104,6 +106,7 @@ export class BrvConfig { public readonly createdAt: string public readonly cwd?: string public readonly ide?: Agent + public readonly reviewDisabled?: boolean public readonly spaceId?: string public readonly spaceName?: string public readonly teamId?: string @@ -122,6 +125,7 @@ export class BrvConfig { this.createdAt = params.createdAt this.cwd = params.cwd this.ide = params.ide + this.reviewDisabled = params.reviewDisabled this.spaceId = params.spaceId this.spaceName = params.spaceName this.teamId = params.teamId @@ -214,6 +218,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, teamId: this.teamId, @@ -235,6 +240,27 @@ export class BrvConfig { }) } + /** + * Creates a new BrvConfig with the reviewDisabled flag updated, preserving all other fields. + */ + public withReviewDisabled(reviewDisabled: boolean): BrvConfig { + return new BrvConfig({ + chatLogPath: this.chatLogPath, + cipherAgentContext: this.cipherAgentContext, + cipherAgentModes: this.cipherAgentModes, + cipherAgentSystemPrompt: this.cipherAgentSystemPrompt, + createdAt: this.createdAt, + cwd: this.cwd, + ide: this.ide, + reviewDisabled, + spaceId: this.spaceId, + spaceName: this.spaceName, + teamId: this.teamId, + teamName: this.teamName, + version: this.version, + }) + } + /** * Creates a new BrvConfig with space fields replaced, preserving all other fields. */ @@ -247,6 +273,7 @@ export class BrvConfig { createdAt: new Date().toISOString(), cwd: this.cwd, ide: this.ide, + reviewDisabled: this.reviewDisabled, spaceId: space.id, spaceName: space.name, teamId: space.teamId, @@ -267,6 +294,7 @@ export class BrvConfig { createdAt: this.createdAt, cwd: this.cwd, ide: this.ide, + reviewDisabled: this.reviewDisabled, spaceId: this.spaceId, spaceName: this.spaceName, teamId: this.teamId, diff --git a/src/server/core/domain/entities/provider-registry.ts b/src/server/core/domain/entities/provider-registry.ts index 3d347e8c8..b5c1f7b53 100644 --- a/src/server/core/domain/entities/provider-registry.ts +++ b/src/server/core/domain/entities/provider-registry.ts @@ -258,7 +258,6 @@ export const PROVIDER_REGISTRY: Readonly> = { 'openai-compatible': { baseUrl: '', category: 'other', - defaultModel: 'llama3', description: 'OpenAI-compatible endpoint (Ollama, LM Studio, etc.)', envVars: ['OPENAI_COMPATIBLE_API_KEY'], headers: {}, diff --git a/src/server/core/domain/transport/schemas.ts b/src/server/core/domain/transport/schemas.ts index 8ef1a8c3f..d2c0d1765 100644 --- a/src/server/core/domain/transport/schemas.ts +++ b/src/server/core/domain/transport/schemas.ts @@ -394,6 +394,12 @@ export const TaskExecuteSchema = z.object({ force: z.boolean().optional(), /** Project path this task belongs to (for multi-project routing) */ projectPath: z.string().optional(), + /** + * Snapshot of the project's review-disabled flag captured at task-create time. + * Stamped by the daemon so the agent does not re-read .brv/config.json and + * race with mid-task toggles. + */ + reviewDisabled: z.boolean().optional(), /** Unique task identifier */ taskId: z.string(), /** Dream trigger source — how this dream was initiated */ diff --git a/src/server/core/domain/transport/task-info.ts b/src/server/core/domain/transport/task-info.ts index b78a60549..5590fa977 100644 --- a/src/server/core/domain/transport/task-info.ts +++ b/src/server/core/domain/transport/task-info.ts @@ -23,6 +23,13 @@ export type TaskInfo = { projectPath?: string /** Set on successful completion */ result?: string + /** + * Snapshot of the project's review-disabled flag captured at task-create time. + * Stamped once at the daemon boundary so daemon (CurateLogHandler) and agent + * (curate-tool backups, dream review entries) observe the same value even if + * the user toggles the flag mid-task. + */ + reviewDisabled?: boolean /** Set when agent picks up the task */ startedAt?: number /** Lifecycle status — defaults to 'created' on construction */ diff --git a/src/server/core/interfaces/i-provider-config-store.ts b/src/server/core/interfaces/i-provider-config-store.ts index 6502006d2..1be633da5 100644 --- a/src/server/core/interfaces/i-provider-config-store.ts +++ b/src/server/core/interfaces/i-provider-config-store.ts @@ -9,7 +9,10 @@ export interface IProviderConfigStore { * Marks a provider as connected. * * @param providerId The provider ID to mark as connected - * @param options Optional settings like default model + * @param options Optional settings like default model. Pass `setAsActive: false` + * to skip flipping the active provider (used when the provider has no + * active model yet — activating it would put the app in a half-configured + * state that the welcome view interprets as "needs setup"). */ connectProvider: ( providerId: string, @@ -18,6 +21,7 @@ export interface IProviderConfigStore { authMethod?: 'api-key' | 'oauth' baseUrl?: string oauthAccountId?: string + setAsActive?: boolean }, ) => Promise diff --git a/src/server/infra/context-tree/propagate-summaries.ts b/src/server/infra/context-tree/propagate-summaries.ts new file mode 100644 index 000000000..8507bbed8 --- /dev/null +++ b/src/server/infra/context-tree/propagate-summaries.ts @@ -0,0 +1,69 @@ +import path from 'node:path' + +import type {ICipherAgent} from '../../../agent/core/interfaces/i-cipher-agent.js' +import type {FileState} from '../../core/domain/entities/context-tree-snapshot.js' +import type {FileContextTreeSnapshotService} from './file-context-tree-snapshot-service.js' + +import {BRV_DIR} from '../../constants.js' +import {DreamLockService} from '../dream/dream-lock-service.js' +import {FileContextTreeManifestService} from './file-context-tree-manifest-service.js' +import {FileContextTreeSummaryService} from './file-context-tree-summary-service.js' +import {diffStates} from './snapshot-diff.js' + +export type PropagateSummariesUnderLockOptions = { + agent: ICipherAgent + baseDir: string + preState: Map | undefined + snapshotService: FileContextTreeSnapshotService + taskId: string +} + +/** + * Phase 4 write block shared by curate and folder-pack post-work: snapshot + * diff → `propagateStaleness` → opportunistic manifest rebuild. Holds the + * dream lock around the writes so a concurrent dream cannot interleave on + * `_index.md` / `_manifest.json`; if the lock is held, this skips because + * dream's own propagation covers the same diff. Fail-open. + */ +export async function propagateSummariesUnderLock( + options: PropagateSummariesUnderLockOptions, +): Promise { + const {agent, baseDir, preState, snapshotService, taskId} = options + if (!preState) return + + const dreamLockService = new DreamLockService({baseDir: path.join(baseDir, BRV_DIR)}) + let acquireResult: Awaited> + try { + acquireResult = await dreamLockService.tryAcquire() + } catch { + return + } + + if (!acquireResult.acquired) return + + let succeeded = false + try { + const postState = await snapshotService.getCurrentState(baseDir) + const changedPaths = diffStates(preState, postState) + if (changedPaths.length === 0) { + succeeded = true + return + } + + const summaryService = new FileContextTreeSummaryService() + const results = await summaryService.propagateStaleness(changedPaths, agent, baseDir, taskId) + if (results.some((result) => result.actionTaken)) { + const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) + await manifestService.buildManifest(baseDir) + } + + succeeded = true + } catch { + // Fail-open: summary/manifest errors never block the caller. + } finally { + await (succeeded + ? dreamLockService.release() + : dreamLockService.rollback(acquireResult.priorMtime) + ).catch(() => {}) + } +} diff --git a/src/server/infra/daemon/agent-process.ts b/src/server/infra/daemon/agent-process.ts index e1c6675bc..41cd7e9e6 100644 --- a/src/server/infra/daemon/agent-process.ts +++ b/src/server/infra/daemon/agent-process.ts @@ -35,6 +35,7 @@ import {FileSystemService} from '../../../agent/infra/file-system/file-system-se import {FolderPackService} from '../../../agent/infra/folder-pack/folder-pack-service.js' import {SessionMetadataStore} from '../../../agent/infra/session/session-metadata-store.js' import {FileKeyStorage} from '../../../agent/infra/storage/file-key-storage.js' +import {runWithReviewDisabled} from '../../../agent/infra/tools/implementations/curate-tool-task-context.js' import {createSearchKnowledgeService} from '../../../agent/infra/tools/implementations/search-knowledge-service.js' import {AuthEvents} from '../../../shared/transport/events/auth-events.js' import {decodeSearchContent} from '../../../shared/transport/search-content.js' @@ -63,6 +64,7 @@ import {FileCurateLogStore} from '../storage/file-curate-log-store.js' import {FileReviewBackupStore} from '../storage/file-review-backup-store.js' import {AgentInstanceDiscovery} from '../transport/agent-instance-discovery.js' import {createAgentLogger} from './agent-logger.js' +import {PostWorkRegistry} from './post-work-registry.js' import {resolveSessionId} from './session-resolver.js' import {validateProviderForTask} from './task-validation.js' @@ -96,6 +98,16 @@ const projectPath = projectPathEnv const agentLog = createAgentLogger(process.env.BRV_SESSION_LOG, `[agent-process:${projectPath}]`) +/** + * Holds detached post-curate work so `task:completed` can fire as soon as + * the agent body finishes. Drained on shutdown to avoid truncated writes. + */ +const postWorkRegistry = new PostWorkRegistry({ + onError(_projectPath, error) { + agentLog(`post-work error: ${error instanceof Error ? error.message : String(error)}`) + }, +}) + /** * Persist a brand-new session's metadata and set it as active. * Best-effort — failures are logged but never block the caller. @@ -435,7 +447,7 @@ async function executeTask( storagePath: string, runtimeSignalStore: IRuntimeSignalStore, ): Promise { - const {clientCwd, clientId, content, files, folderPath, force, taskId, trigger, type, worktreeRoot} = task + const {clientCwd, clientId, content, files, folderPath, force, reviewDisabled, taskId, trigger, type, worktreeRoot} = task if (!transport || !agent) return // Search tasks are pure BM25 retrieval — no LLM, no provider needed. @@ -453,7 +465,19 @@ async function executeTask( activeTaskCount++ - try { + // Body of the task — extracted so the daemon-stamped reviewDisabled snapshot can be + // opened as an AsyncLocalStorage scope around it. Tools that run inside this task + // (curate-tool.executeCurate, including the sandbox `tools.curate(...)` path via + // CurateService where _context.taskId is not threaded through) read the snapshot + // from the ALS scope instead of re-reading .brv/config.json — that read can race + // with mid-task user toggles, which is exactly the inconsistency we are eliminating. + // We only open the scope when the daemon stamped a value; otherwise consumers fall + // back to the file read, preserving behavior for legacy clients without a stamp. + const runTaskBody = async (): Promise => { + // Re-narrow inside the closure: TypeScript loses the function-scope narrowing + // from the early-return guard above once we hand control to a callback. + if (!transport || !agent) return + // Only refresh config and hot-swap provider when this is the first concurrent task. // Subsequent concurrent tasks reuse cached config to avoid race conditions // on provider hot-swap (which replaces SessionManager). @@ -511,12 +535,24 @@ async function executeTask( // Socket dropped — continue executing so we can still emit task:completed/error when socket reconnects } + // Block new tree-writers until any detached Phase 4 from a prior task + // on this project drains. `query` / `search` are intentionally NOT + // gated — they read the manifest and tolerate a stale snapshot via + // `readManifestIfFresh` + rebuild fallback, so blocking them would + // be a needless latency hit. + if (type === 'curate' || type === 'curate-folder' || type === 'dream') { + await postWorkRegistry.awaitProject(projectPath) + } + try { let result: string let logId: string | undefined + // Captured during curate / curate-folder; submitted to the registry + // after `task:completed` so the user does not wait on Phase 4. + let postWork: (() => Promise) | undefined switch (type) { case 'curate': { - result = await curateExecutor.executeWithAgent(agent, { + const curateResult = await curateExecutor.runAgentBody(agent, { clientCwd, content, files, @@ -524,12 +560,14 @@ async function executeTask( taskId, worktreeRoot, }) + result = curateResult.response + postWork = curateResult.finalize break } case 'curate-folder': { - result = await folderPackExecutor.executeWithAgent(agent, { + const folderResult = await folderPackExecutor.runAgentBody(agent, { clientCwd, content, folderPath: folderPath!, @@ -537,6 +575,8 @@ async function executeTask( taskId, worktreeRoot, }) + result = folderResult.response + postWork = folderResult.finalize break } @@ -575,6 +615,7 @@ async function executeTask( const dreamResult = await dreamExecutor.executeWithAgent(agent, { priorMtime: eligibility.priorMtime, projectRoot: projectPath, + ...(reviewDisabled === undefined ? {} : {reviewDisabled}), taskId, trigger: trigger ?? 'cli', }) @@ -614,7 +655,8 @@ async function executeTask( } } - // Emit task:completed + // Emit task:completed BEFORE the detached Phase 4 so the user sees + // the response as soon as the agent body finishes. agentLog(`task:completed taskId=${taskId}`) try { transport.request(TransportTaskEventNames.COMPLETED, {clientId, ...(logId ? {logId} : {}), projectPath, result, taskId}) @@ -623,6 +665,13 @@ async function executeTask( `task:completed send failed taskId=${taskId}: ${error instanceof Error ? error.message : String(error)}`, ) } + + // Submit detached post-curate work to the registry. Mutex'd per project + // and drained on shutdown so SIGTERM mid-work cannot truncate `_index.md`. + if (postWork) { + agentLog(`post-work queued taskId=${taskId}`) + postWorkRegistry.submit(projectPath, postWork) + } } catch (error) { // Emit task:error const errorData = serializeTaskError(error) @@ -637,15 +686,30 @@ async function executeTask( } finally { cleanupForwarding?.() } + } + + try { + await (reviewDisabled === undefined ? runTaskBody() : runWithReviewDisabled(reviewDisabled, runTaskBody)) } finally { activeTaskCount-- - // Deferred hot-swap: if provider changed while tasks were in-flight, - // trigger swap now that all tasks are done + // Deferred hot-swap when provider changed mid-task. Wait on detached + // Phase 4 first — rebuilding SessionManager during an in-flight + // `propagateStaleness` LLM call would silently corrupt Phase 4. + // Reserve the swap slot synchronously by clearing the dirty flag now; + // a task arriving during the awaitAll wait then sees a clean flag and + // skips its inline swap, so only the deferred chain runs hotSwap. if (activeTaskCount === 0 && providerConfigDirty && agent && transport) { - hotSwapProvider(agent, transport).catch((error) => { - agentLog(`deferred hotSwapProvider failed: ${error instanceof Error ? error.message : String(error)}`) - }) + providerConfigDirty = false + const swapAgent = agent + const swapTransport = transport + postWorkRegistry + .awaitAll() + .then(() => hotSwapProvider(swapAgent, swapTransport)) + .catch((error: unknown) => { + providerConfigDirty = true + agentLog(`deferred hotSwapProvider failed: ${error instanceof Error ? error.message : String(error)}`) + }) } } } @@ -801,6 +865,18 @@ async function hotSwapProvider( async function shutdown(): Promise { agentLog('Shutting down...') + // Drain detached Phase 4 BEFORE stopping the agent — `propagateStaleness` + // mid-write would otherwise leave `_index.md` truncated on SIGTERM. + try { + const drainStart = Date.now() + const drainResult = await postWorkRegistry.drain(30_000) + agentLog( + `post-work drain (${Date.now() - drainStart}ms): drained=${drainResult.drained} abandoned=${drainResult.abandoned}`, + ) + } catch (error) { + agentLog(`post-work drain failed: ${error instanceof Error ? error.message : String(error)}`) + } + try { if (agent) { await agent.stop() diff --git a/src/server/infra/daemon/brv-server.ts b/src/server/infra/daemon/brv-server.ts index b3c2cd83c..ba8208296 100644 --- a/src/server/infra/daemon/brv-server.ts +++ b/src/server/infra/daemon/brv-server.ts @@ -272,6 +272,16 @@ async function main(): Promise { const getQueueLength = (projectPath: string): number => agentPool?.getQueueState().find((q) => q.projectPath === projectPath)?.queueLength ?? 0 + // Shared project-config resolver — used by the idle-dream dispatch and the + // task-router resolver wired into TransportHandlers below. Both paths must + // stamp the same reviewDisabled value so review semantics are consistent + // regardless of dispatch source (CLI task:create vs idle trigger). + const curateConfigStore = new ProjectConfigStore() + const resolveReviewDisabled = async (projectPath: string): Promise => { + const config = await curateConfigStore.read(projectPath) + return config?.reviewDisabled === true + } + // Shared dream pre-check trigger factory. // The lock service explicitly throws if invoked — gate 4 (lock) is the agent's job; // the daemon must only ever evaluate gates 1-3 via checkEligibility(). @@ -311,11 +321,18 @@ async function main(): Promise { const result = await makeDreamPreCheckTrigger(projectPath).checkEligibility(projectPath) if (result.eligible) { log(`Dream eligible, dispatching dream task: ${projectPath}`) + // Idle dispatch bypasses TaskRouter.handleTaskCreate, so the + // reviewDisabled snapshot that the task-router stamps for the CLI + // path must be reproduced inline here. Without it, idle dreams + // would always default to review-enabled regardless of project + // setting (see resolveReviewDisabled above). + const reviewDisabled = await resolveReviewDisabled(projectPath) agentPool?.submitTask({ clientId: 'daemon', content: 'Memory consolidation (idle trigger)', force: false, projectPath, + reviewDisabled, taskId: randomUUID(), trigger: 'agent-idle', type: 'dream', @@ -387,6 +404,13 @@ async function main(): Promise { const transportHandlers = new TransportHandlers({ agentPool, clientManager, + // Resolves the project's review-disabled flag once at task-create. The result + // is stamped onto TaskInfo + TaskExecute so daemon hooks (CurateLogHandler) and + // the agent process (curate-tool backups, dream review entries) all observe a + // single value across the daemon→agent process boundary. Shared with the + // idle-dream dispatch above so review semantics are identical regardless of + // dispatch source (CLI task:create vs agent-idle trigger). + isReviewDisabled: resolveReviewDisabled, lifecycleHooks: [curateLogHandler, queryLogHandler], // Daemon-side gate for dream task:create — mirrors the idle-trigger pre-check // in this file so the CLI path (brv dream without --force) actually honors diff --git a/src/server/infra/daemon/post-work-registry.ts b/src/server/infra/daemon/post-work-registry.ts new file mode 100644 index 000000000..ea440099b --- /dev/null +++ b/src/server/infra/daemon/post-work-registry.ts @@ -0,0 +1,98 @@ +/** + * Per-project serialized fire-and-forget queue with bounded shutdown drain. + * Different projects run concurrently; same project runs serially so two + * writers cannot race on `_index.md`. Thunk errors are swallowed (via + * `onError`) so one bad thunk cannot block the chain. + */ + +export type PostWorkThunk = () => Promise + +export type PostWorkRegistryOptions = { + onError?: (projectPath: string, error: unknown) => void +} + +export class PostWorkRegistry { + /** Optional logger called when a thunk throws. */ + private readonly onError?: (projectPath: string, error: unknown) => void + /** + * Per-project tail promise. Each `submit(project, thunk)` chains the new + * thunk after the previous tail. The tail is replaced atomically so the + * next submission picks up the latest one without races. + */ + private readonly tails = new Map>() + + public constructor(options: PostWorkRegistryOptions = {}) { + this.onError = options.onError + } + + /** + * Wait on the snapshot of tails captured at call time across all projects. + * Submissions arriving during the await are not awaited. + */ + public async awaitAll(): Promise { + const tails = [...this.tails.values()] + if (tails.length === 0) return + await Promise.all(tails) + } + + /** + * Wait on the project's tail captured at call time. Submissions arriving + * during the await are not awaited — keeps `--wait-finalize` deterministic. + */ + public async awaitProject(projectPath: string): Promise { + const tail = this.tails.get(projectPath) + if (tail) { + await tail + } + } + + /** + * Wait up to `timeoutMs` for in-flight work across all projects. + * Errored thunks count as `drained` (work resolved, surfaced to onError); + * thunks still running at the deadline count as `abandoned`. + */ + public async drain(timeoutMs: number): Promise<{abandoned: number; drained: number}> { + const tails = [...this.tails.values()] + if (tails.length === 0) { + return {abandoned: 0, drained: 0} + } + + let timeoutHandle: NodeJS.Timeout | undefined + const timeoutPromise = new Promise<'timeout'>((resolve) => { + timeoutHandle = setTimeout(() => resolve('timeout'), timeoutMs) + }) + + const results = await Promise.all( + tails.map(async (tail) => { + const outcome = await Promise.race([tail.then(() => 'done' as const), timeoutPromise]) + return outcome + }), + ) + + if (timeoutHandle !== undefined) clearTimeout(timeoutHandle) + + const drained = results.filter((r) => r === 'done').length + const abandoned = results.filter((r) => r === 'timeout').length + return {abandoned, drained} + } + + /** + * Submit a thunk to run after any prior work for `projectPath` finishes. + * Returns synchronously; the thunk runs on the microtask queue. + */ + public submit(projectPath: string, thunk: PostWorkThunk): void { + const previousTail = this.tails.get(projectPath) ?? Promise.resolve() + const newTail = previousTail + .then(thunk) + .catch((error: unknown) => { + this.onError?.(projectPath, error) + }) + .finally(() => { + // Drop the map entry only if no follow-up submission appended. + if (this.tails.get(projectPath) === newTail) { + this.tails.delete(projectPath) + } + }) + this.tails.set(projectPath, newTail) + } +} diff --git a/src/server/infra/dream/dream-lock-service.ts b/src/server/infra/dream/dream-lock-service.ts index 796f87271..855872051 100644 --- a/src/server/infra/dream/dream-lock-service.ts +++ b/src/server/infra/dream/dream-lock-service.ts @@ -10,10 +10,9 @@ type DreamLockServiceOptions = { } /** - * PID-based lock for dream execution. - * - * Lock file contains the owning process PID. Staleness is determined by mtime. - * Uses write-then-verify to handle race conditions between concurrent acquirers. + * PID-based file lock for context-tree writers (dream and curate's detached + * post-work). Lock file holds the owner PID; staleness is mtime-based. Uses + * write-then-verify to settle races between concurrent acquirers. */ export class DreamLockService { private readonly lockFilePath: string diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index 99226f556..fd39394da 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -51,7 +51,24 @@ export class CurateExecutor implements ICurateExecutor { this.fileContentReader = fileContentReader ?? createFileContentReader() } + /** Synchronous wrapper — runs Phases 1-3 and awaits Phase 4 inline. */ public async executeWithAgent(agent: ICipherAgent, options: CurateExecuteOptions): Promise { + const {finalize, response} = await this.runAgentBody(agent, options) + await finalize() + return response + } + + /** + * Returns the response after Phases 1-3 plus a `finalize` thunk for Phase 4 + * (snapshot diff, summary regen, manifest rebuild, dream counter, drain, + * task-session cleanup). Caller invokes `finalize` exactly once. The thunk + * is fail-open. If the agent body itself throws, the task session is cleaned + * up before propagating and no `finalize` is returned. + */ + public async runAgentBody( + agent: ICipherAgent, + options: CurateExecuteOptions, + ): Promise<{finalize: () => Promise; response: string}> { const {clientCwd, content, files, projectRoot, taskId} = options // --- Phase 1: Preprocessing (no sessions created yet — safe to throw) --- @@ -76,6 +93,8 @@ export class CurateExecutor implements ICurateExecutor { } const taskSessionId = await agent.createTaskSession(taskId, 'curate', {mapRootEligible: true, userFacing: true}) + + let response: string try { // Task-scoped variable names for RLM pattern. // Replace hyphens with underscores: UUIDs have hyphens which are invalid in JS identifiers, @@ -144,72 +163,31 @@ export class CurateExecutor implements ICurateExecutor { // Execute on the task session (isolated sandbox + history) // Task lifecycle is managed by Transport (task:started, task:completed, task:error) - const response = await agent.executeOnSession(taskSessionId, prompt, { + response = await agent.executeOnSession(taskSessionId, prompt, { executionContext: {clearHistory: true, commandType: 'curate', maxIterations: 50}, taskId, }) // Parse curation status from agent response for status tracking this.lastStatus = this.parseCurationStatus(taskId, response) + } catch (error) { + // Clean up before propagating — error path returns no finalize. + await agent.deleteTaskSession(taskSessionId) + throw error + } - // Summary cascade regeneration (the LLM-driven `propagateStaleness` walk) - // is deferred to the next dream cycle to keep curate's hot path free of - // LLM calls. The manifest is rebuilt inline because it is a pure file - // scan (no LLM) and keeps newly-curated leaf files immediately - // discoverable via manifest-driven retrieval. - // Hoisted: both blocks below construct a DreamStateService against the - // same project. They share the module-level mutex via `getStateMutex`, - // so a single instance is sufficient and avoids duplicate construction. - const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) - - // Two independent fail-open concerns: (a) enqueue the deferred - // summary-cascade work to dream's queue; (b) rebuild the search - // manifest. They share `changedPaths` but otherwise are unrelated — - // a transient disk error on the dream-state write must not skip the - // pure-filesystem manifest scan that keeps newly-curated leaf files - // immediately discoverable. Each runs in its own try block so one - // failure cannot mask the other's work. - let changedPaths: string[] = [] - if (preState) { - try { - const postState = await snapshotService.getCurrentState(baseDir) - changedPaths = diffStates(preState, postState) - } catch { - // Fail-open: snapshot errors leave changedPaths empty → no enqueue, - // no manifest rebuild. Next curate's snapshot will pick up the diff. - } - - if (changedPaths.length > 0) { - try { - await dreamStateService.enqueueStaleSummaryPaths(changedPaths) - } catch { - // Fail-open: queue write errors never block curation. The next - // curate's enqueue will still capture the same paths via diffStates. - } - - try { - const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) - await manifestService.buildManifest(baseDir) - } catch { - // Fail-open: manifest rebuild is best-effort pre-warming. - } - } - } - - // Increment dream curation counter (fail-open: non-critical for curation) + const finalize = async (): Promise => { try { - await dreamStateService.incrementCurationCount() - } catch { - // Dream state tracking is non-critical — don't block curation + await this.propagateAndRebuild({baseDir, preState, snapshotService}) + await this.incrementDreamCounter(baseDir) + await (agent as BackgroundDrainAgent).drainBackgroundWork?.() + } finally { + // In `finally` so the session is deleted even if Phase 4 throws. + await agent.deleteTaskSession(taskSessionId) } - - await (agent as BackgroundDrainAgent).drainBackgroundWork?.() - - return response - } finally { - // Clean up entire task session (sandbox + history) in one call - await agent.deleteTaskSession(taskSessionId) } + + return {finalize, response} } /** @@ -286,6 +264,19 @@ export class CurateExecutor implements ICurateExecutor { } } + /** + * Phase 4d: bump the dream-state curation counter. Fail-open — dream state + * tracking is non-critical and must never block curate completion. + */ + private async incrementDreamCounter(baseDir: string): Promise { + try { + const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) + await dreamStateService.incrementCurationCount() + } catch { + // Dream state tracking is non-critical + } + } + /** * Parse curation status from the agent response. * Extracts JSON status block if present, otherwise infers from response text. @@ -375,4 +366,54 @@ export class CurateExecutor implements ICurateExecutor { // Format with actual content return this.formatFileContentsForPrompt(readResults, skippedFiles, projectRoot) } + + /** + * Phase 4: snapshot diff → enqueue stale paths for dream → rebuild manifest. + * + * Summary cascade regeneration (the LLM-driven `propagateStaleness` walk) is + * deferred to the next dream cycle to keep curate's hot path free of LLM + * calls. The manifest is rebuilt inline because it is a pure file scan (no + * LLM) and keeps newly-curated leaf files immediately discoverable via + * manifest-driven retrieval. + * + * Two independent fail-open concerns: (a) enqueue the deferred summary-cascade + * work to dream's queue; (b) rebuild the search manifest. They share + * `changedPaths` but otherwise are unrelated — a transient disk error on the + * dream-state write must not skip the pure-filesystem manifest scan. Each + * runs in its own try block so one failure cannot mask the other's work. + */ + private async propagateAndRebuild(args: { + baseDir: string + preState: Map | undefined + snapshotService: FileContextTreeSnapshotService + }): Promise { + const {baseDir, preState, snapshotService} = args + if (!preState) return + + let changedPaths: string[] = [] + try { + const postState = await snapshotService.getCurrentState(baseDir) + changedPaths = diffStates(preState, postState) + } catch { + // Fail-open: snapshot errors leave changedPaths empty → no enqueue, + // no manifest rebuild. Next curate's snapshot will pick up the diff. + } + + if (changedPaths.length === 0) return + + try { + const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) + await dreamStateService.enqueueStaleSummaryPaths(changedPaths) + } catch { + // Fail-open: queue write errors never block curation. The next + // curate's enqueue will still capture the same paths via diffStates. + } + + try { + const manifestService = new FileContextTreeManifestService({baseDirectory: baseDir}) + await manifestService.buildManifest(baseDir) + } catch { + // Fail-open: manifest rebuild is best-effort pre-warming. + } + } } diff --git a/src/server/infra/executor/dream-executor.ts b/src/server/infra/executor/dream-executor.ts index d0426d6b2..f516c9241 100644 --- a/src/server/infra/executor/dream-executor.ts +++ b/src/server/infra/executor/dream-executor.ts @@ -77,6 +77,14 @@ export type DreamExecutorDeps = { type DreamExecuteOptions = { priorMtime: number projectRoot: string + /** + * Snapshot of the project's reviewDisabled flag captured at task-create on the + * daemon side and passed through TaskExecute. When true, the dream-side dual-write + * of needsReview operations into the curate log is skipped — they won't surface in + * `brv review pending` and won't appear as review entries in the curate log folder. + * Undefined → treated as enabled (fail-open). + */ + reviewDisabled?: boolean taskId: string trigger: 'agent-idle' | 'cli' | 'manual' } @@ -122,6 +130,13 @@ export class DreamExecutor { // dreamStateService.update throws) would re-write the same review entries. let reviewEntriesWritten = false + // The disable flag is captured once at task-create on the daemon and forwarded via + // TaskExecute → executeTask → here. Reading it from options (instead of re-resolving + // from .brv/config.json on the agent side) means backup-creation and review-entry + // writing observe the same value as the daemon-side curate log handler, even if the + // user toggles mid-run. + const reviewDisabled = options.reviewDisabled ?? false + try { // Step 1: Capture pre-state const snapshotService = new FileContextTreeSnapshotService({baseDirectory: projectRoot}) @@ -152,6 +167,7 @@ export class DreamExecutor { logId, out: allOperations, projectRoot, + reviewDisabled, signal: controller.signal, taskId: options.taskId, }) @@ -208,7 +224,7 @@ export class DreamExecutor { // Step 6b: Create curate log entries for needsReview operations (dual-write for review system). // Runs after the completed dream log is durably written so review tasks never outlive their dream log. - await this.createReviewEntries(allOperations, contextTreeDir, options.taskId) + await this.createReviewEntries({contextTreeDir, operations: allOperations, reviewDisabled, taskId: options.taskId}) reviewEntriesWritten = true // Step 7: Update dream state — atomic RMW under the per-file mutex so a @@ -223,7 +239,7 @@ export class DreamExecutor { })) succeeded = true - return {logId, result: this.formatResult(logId, summary)} + return {logId, result: this.formatResult(logId, summary, reviewDisabled)} } catch (error) { // Save error/partial log entry (best-effort). Use allOperations so any work // that completed before the failure is captured — keeps the audit trail and @@ -264,7 +280,7 @@ export class DreamExecutor { // already wrote the entries (i.e. step 7 threw after step 6b succeeded) // to prevent duplicate review items. if (allOperations.length > 0 && !reviewEntriesWritten) { - await this.createReviewEntries(allOperations, contextTreeDir, options.taskId) + await this.createReviewEntries({contextTreeDir, operations: allOperations, reviewDisabled, taskId: options.taskId}) } throw error @@ -285,6 +301,7 @@ export class DreamExecutor { * each step. Extracted so the executor can preserve partial work when a later step * throws — and so tests can inject controlled ops without a full LLM round-trip. */ + // protected is required for test subclassing (ProbeExecutor, makePartialRunExecutor) protected async runOperations(args: { agent: ICipherAgent changedFiles: Set @@ -292,17 +309,24 @@ export class DreamExecutor { logId: string out: DreamOperation[] projectRoot: string + /** + * When true, the dream-side review system (backups + curate-log dual-write) is suppressed. + * Backups exist only to support review rejection; with reviews disabled they are dead state, + * so consolidate/prune are run without a `reviewBackupStore` so review-backups/ stays empty. + */ + reviewDisabled?: boolean signal: AbortSignal taskId: string }): Promise { - const {agent, changedFiles, contextTreeDir, logId, out, projectRoot, signal, taskId} = args + const {agent, changedFiles, contextTreeDir, logId, out, projectRoot, reviewDisabled, signal, taskId} = args + const reviewBackupStore = reviewDisabled === true ? undefined : this.deps.reviewBackupStore out.push( ...(await consolidate([...changedFiles], { agent, contextTreeDir, dreamStateService: this.deps.dreamStateService, - reviewBackupStore: this.deps.reviewBackupStore, + reviewBackupStore, runtimeSignalStore: this.deps.runtimeSignalStore, searchService: this.deps.searchService, signal, @@ -331,7 +355,7 @@ export class DreamExecutor { dreamLogId: logId, dreamStateService: this.deps.dreamStateService, projectRoot, - reviewBackupStore: this.deps.reviewBackupStore, + reviewBackupStore, runtimeSignalStore: this.deps.runtimeSignalStore, signal, taskId, @@ -375,14 +399,21 @@ export class DreamExecutor { * Dual-write: create curate log entries for dream operations that need human review. * This surfaces them in `brv review pending` without modifying the review system. */ - private async createReviewEntries( - operations: DreamOperation[], - contextTreeDir: string, - taskId: string, - ): Promise { + private async createReviewEntries(args: { + contextTreeDir: string + operations: DreamOperation[] + reviewDisabled: boolean + taskId: string + }): Promise { + const {contextTreeDir, operations, reviewDisabled, taskId} = args const reviewOps = operations.filter((op) => op.needsReview) if (reviewOps.length === 0) return + // Honor `brv review --disable`: when disabled, dream's needsReview ops are not surfaced + // through the curate-log dual-write, so they don't appear in `brv review pending`. + // The flag is the daemon-stamped snapshot passed in via DreamExecuteOptions. + if (reviewDisabled) return + const curateOps: CurateLogEntry['operations'] = reviewOps.map((op) => mapDreamOpToCurateOp(op, contextTreeDir), ) @@ -456,7 +487,7 @@ export class DreamExecutor { return new Set(results.filter((f): f is string => f !== null)) } - private formatResult(logId: string, summary: DreamLogSummary): string { + private formatResult(logId: string, summary: DreamLogSummary, reviewDisabled: boolean): string { const parts = [`Dream completed (${logId})`] const counts = [ summary.consolidated > 0 ? `${summary.consolidated} consolidated` : '', @@ -473,12 +504,16 @@ export class DreamExecutor { parts.push(`${summary.errors} operations failed`) } - if (summary.flaggedForReview > 0) { + // Suppress when review is disabled — the count comes from LLM `needsReview` tags + // computed before the dual-write gate, so the ops were intentionally not enqueued + // and would not appear in `brv review pending`. + if (summary.flaggedForReview > 0 && !reviewDisabled) { parts.push(`${summary.flaggedForReview} operations flagged for review`) } return parts.join('\n') } + } /** Map a dream operation to a curate log operation for the review system. */ diff --git a/src/server/infra/executor/folder-pack-executor.ts b/src/server/infra/executor/folder-pack-executor.ts index afe407005..1f0b40c66 100644 --- a/src/server/infra/executor/folder-pack-executor.ts +++ b/src/server/infra/executor/folder-pack-executor.ts @@ -10,10 +10,8 @@ import type { IFolderPackExecutor, } from '../../core/interfaces/executor/i-folder-pack-executor.js' -import {FileContextTreeManifestService} from '../context-tree/file-context-tree-manifest-service.js' import {FileContextTreeSnapshotService} from '../context-tree/file-context-tree-snapshot-service.js' -import {FileContextTreeSummaryService} from '../context-tree/file-context-tree-summary-service.js' -import {diffStates} from '../context-tree/snapshot-diff.js' +import {propagateSummariesUnderLock} from '../context-tree/propagate-summaries.js' const LOG_PATH = process.env.BRV_SESSION_LOG type BackgroundDrainAgent = ICipherAgent & {drainBackgroundWork?: () => Promise} @@ -47,7 +45,22 @@ function folderPackLog(message: string): void { export class FolderPackExecutor implements IFolderPackExecutor { constructor(private readonly folderPackService: IFolderPackService) {} + /** Synchronous wrapper — runs the agent body and awaits Phase 4 inline. */ public async executeWithAgent(agent: ICipherAgent, options: FolderPackExecuteOptions): Promise { + const {finalize, response} = await this.runAgentBody(agent, options) + await finalize() + return response + } + + /** + * Returns the response after the agent body, plus a `finalize` thunk for + * Phase 4 (snapshot diff → propagateStaleness → manifest rebuild → drain). + * Caller invokes `finalize` exactly once. + */ + public async runAgentBody( + agent: ICipherAgent, + options: FolderPackExecuteOptions, + ): Promise<{finalize: () => Promise; response: string}> { const {clientCwd, content, folderPath, projectRoot, taskId, worktreeRoot} = options // Resolve folder path: @@ -83,31 +96,19 @@ export class FolderPackExecutor implements IFolderPackExecutor { }) // Use iterative extraction strategy (inspired by rlm) - // Stores packed folder in sandbox environment and lets agent iteratively query/extract - // This avoids token limits entirely - works for folders of any size const response = await this.executeIterative(agent, packResult, content, absoluteFolderPath, taskId, tempFileDir) - if (preState) { - try { - const postState = await snapshotService.getCurrentState(tempFileDir) - const changedPaths = diffStates(preState, postState) - if (changedPaths.length > 0) { - const summaryService = new FileContextTreeSummaryService() - const results = await summaryService.propagateStaleness(changedPaths, agent, tempFileDir, taskId) - - if (results.some((result) => result.actionTaken)) { - const manifestService = new FileContextTreeManifestService({baseDirectory: tempFileDir}) - await manifestService.buildManifest(tempFileDir) - } - } - } catch { - // Fail-open: summary/manifest errors never block curation - } + // Build the Phase 4 thunk. Note: unlike CurateExecutor, the task session + // was already deleted inside `executeIterative`'s `finally`, so Phase 4's + // `propagateStaleness` runs against the agent's default session. That + // is fine for folder-pack — it has no per-task sandbox state to preserve + // through Phase 4. + const finalize = async (): Promise => { + await propagateSummariesUnderLock({agent, baseDir: tempFileDir, preState, snapshotService, taskId}) + await (agent as BackgroundDrainAgent).drainBackgroundWork?.() } - await (agent as BackgroundDrainAgent).drainBackgroundWork?.() - - return response + return {finalize, response} } /** diff --git a/src/server/infra/git/git-error-messages.ts b/src/server/infra/git/git-error-messages.ts new file mode 100644 index 000000000..d47a0170c --- /dev/null +++ b/src/server/infra/git/git-error-messages.ts @@ -0,0 +1,24 @@ +/** + * Working-tree-overwrite error messages, unified across pull / merge / checkout + * paths. Trailer says "commit or discard" so users know `brv vc reset` is the + * escape hatch (brv has no stash). + * + * The "would be overwritten" anchor is preserved verbatim because vc-handler + * pattern-matches on it to map GitError → VcError(UNCOMMITTED_CHANGES). + */ +export type OverwriteOperation = 'checkout' | 'merge' | 'pull' + +function trailerFor(operation: OverwriteOperation): string { + const action = operation === 'checkout' ? 'switch branches' : operation + return `Please commit or discard your changes before you ${action}.` +} + +export function formatOverwriteMessage(operation: OverwriteOperation, files: string[]): string { + const trailer = trailerFor(operation) + if (files.length === 0) { + return `Your local changes would be overwritten by ${operation}. ${trailer}` + } + + const list = files.map((f) => `\t${f}`).join('\n') + return `Your local changes to the following files would be overwritten by ${operation}:\n${list}\n${trailer}` +} diff --git a/src/server/infra/git/isomorphic-git-service.ts b/src/server/infra/git/isomorphic-git-service.ts index bbb4bf01b..72d0f4774 100644 --- a/src/server/infra/git/isomorphic-git-service.ts +++ b/src/server/infra/git/isomorphic-git-service.ts @@ -53,7 +53,9 @@ import type {IAuthStateStore} from '../../core/interfaces/state/i-auth-state-sto import {hasConflictMarkers} from '../../../shared/utils/conflict-markers.js' import {GitAuthError, GitError} from '../../core/domain/errors/git-error.js' +import {formatOverwriteMessage} from './git-error-messages.js' import {gitHttpWrapper as http} from './git-http-wrapper.js' +import {classifyTuple} from './status-row-classifier.js' /** Max commit depth for ahead/behind calculation. Counts beyond this are truncated. */ const MAX_AHEAD_BEHIND_DEPTH = 500 @@ -205,10 +207,7 @@ export class IsomorphicGitService implements IGitService { await git.checkout({dir, force: params.force, fs, ref: params.ref}) } catch (error) { if (error instanceof git.Errors.CheckoutConflictError) { - throw new GitError( - 'Your local changes to the following files would be overwritten by checkout. ' + - 'Commit your changes or stash them before you switch branches.', - ) + throw new GitError(formatOverwriteMessage('checkout', [])) } throw error @@ -641,7 +640,7 @@ export class IsomorphicGitService implements IGitService { await git.writeRef({dir, force: true, fs, ref: `refs/heads/${currentBranch}`, value: localSha}) } - throw new GitError('Local changes would be overwritten by merge. Commit or discard your changes first.') + throw new GitError(formatOverwriteMessage('merge', [])) } if (error instanceof git.Errors.MergeConflictError) { @@ -723,7 +722,9 @@ export class IsomorphicGitService implements IGitService { // Abort if any dirty local file would be overwritten by the incoming changes if (localSha && remoteSha) { const matrix = await git.statusMatrix({dir, fs}) - const dirtyFiles = matrix.filter((row) => row[2] !== 1 || row[3] !== 1).map((row) => String(row[0])) + const dirtyFiles = matrix + .filter(([, head, workdir, stage]) => classifyTuple(head, workdir, stage).dirty) + .map((row) => String(row[0])) const localRef = localSha const remoteRef = remoteSha @@ -742,8 +743,9 @@ export class IsomorphicGitService implements IGitService { return localFileOid !== remoteFileOid }), ) - if (wouldBeOverwritten.some(Boolean)) { - throw new GitError('Local changes would be overwritten by pull. Commit or discard your changes first.') + const overwrittenFiles = dirtyFiles.filter((_, i) => wouldBeOverwritten[i]) + if (overwrittenFiles.length > 0) { + throw new GitError(formatOverwriteMessage('pull', overwrittenFiles)) } } @@ -802,7 +804,7 @@ export class IsomorphicGitService implements IGitService { await git.writeRef({dir, force: true, fs, ref: `refs/heads/${localBranch}`, value: localSha}) } - throw new GitError('Local changes would be overwritten by pull. Commit or discard your changes first.') + throw new GitError(formatOverwriteMessage('pull', [])) } throw error @@ -984,21 +986,15 @@ export class IsomorphicGitService implements IGitService { } private classifyStagedRow(row: StatusRow): ChangedFile | undefined { - const [filepath, head, , stage] = row - const path = String(filepath) - if (head === 0 && (stage === 2 || stage === 3)) return {path, status: 'added'} - if (head === 1 && stage === 0) return {path, status: 'deleted'} - if (head === 1 && (stage === 2 || stage === 3)) return {path, status: 'modified'} - return undefined + const [filepath, head, workdir, stage] = row + const status = classifyTuple(head, workdir, stage).stagedDiff + return status ? {path: String(filepath), status} : undefined } private classifyUnstagedRow(row: StatusRow): ChangedFile | undefined { const [filepath, head, workdir, stage] = row - const path = String(filepath) - if (head === 0 && stage === 0) return undefined // skip untracked - if (workdir === 0 && (stage === 1 || stage === 2 || stage === 3)) return {path, status: 'deleted'} - if (workdir === 2 && (stage === 1 || stage === 3)) return {path, status: 'modified'} - return undefined + const status = classifyTuple(head, workdir, stage).unstagedDiff + return status ? {path: String(filepath), status} : undefined } private conflictsFromError(error: Error): GitConflict[] { @@ -1064,9 +1060,9 @@ export class IsomorphicGitService implements IGitService { private async guardStagedConflicts(dir: string, sourceBranch: string, targetRef: string): Promise { const matrix = await git.statusMatrix({dir, fs}) - // Staged files: index (col 3) differs from HEAD (col 1) - // [1,_,2] modified+staged, [1,_,0] deleted+staged, [0,_,2] new+staged - const stagedFiles = matrix.filter(([, head, , stage]) => stage !== head).map(([filepath]) => String(filepath)) + const stagedFiles = matrix + .filter(([, head, workdir, stage]) => classifyTuple(head, workdir, stage).staged) + .map(([filepath]) => String(filepath)) if (stagedFiles.length === 0) return @@ -1085,11 +1081,7 @@ export class IsomorphicGitService implements IGitService { /* eslint-enable no-await-in-loop */ if (conflicting.length > 0) { - throw new GitError( - 'Your local changes to the following files would be overwritten by checkout:\n' + - conflicting.map((f) => `\t${f}`).join('\n') + - '\nPlease commit your changes or stash them before you switch branches.', - ) + throw new GitError(formatOverwriteMessage('checkout', conflicting)) } } @@ -1258,36 +1250,14 @@ export class IsomorphicGitService implements IGitService { return {success: true} } - // eslint-disable-next-line complexity private parseMatrix(matrix: StatusRow[]): GitStatusFile[] { const files: GitStatusFile[] = [] for (const [filepath, head, workdir, stage] of matrix) { const path = String(filepath) - if (head === 1 && workdir === 0 && stage === 0) { - files.push({path, staged: true, status: 'deleted'}) // [1,0,0] staged deletion (git rm) - } else if (head === 1 && workdir === 0 && stage === 1) { - files.push({path, staged: false, status: 'deleted'}) // [1,0,1] unstaged deletion (rm without git rm) - } else if (head === 1 && workdir === 0 && stage === 2) { - files.push({path, staged: false, status: 'deleted'}) // [1,0,2] absent from disk, index differs from HEAD (e.g. post-merge-conflict) - } else if (head === 1 && workdir === 0 && stage === 3) { - // [1,0,3] staged modification then deleted from disk → show both staged mod and unstaged deletion - files.push({path, staged: true, status: 'modified'}, {path, staged: false, status: 'deleted'}) - } else if (head === 1 && workdir === 1 && stage === 0) { - files.push({path, staged: true, status: 'deleted'}, {path, staged: false, status: 'untracked'}) // [1,1,0] git rm --cached: staged deletion + file still in workdir → untracked - } else if (head === 1 && workdir === 2 && stage === 1) { - files.push({path, staged: false, status: 'modified'}) // [1,2,1] unstaged modification - } else if (head === 1 && workdir === 2 && stage === 2) { - files.push({path, staged: true, status: 'modified'}) // [1,2,2] staged modification - } else if (head === 1 && workdir === 2 && stage === 3) { - files.push({path, staged: true, status: 'modified'}, {path, staged: false, status: 'modified'}) // [1,2,3] partially staged modification - } else if (head === 0 && workdir === 2 && stage === 0) { - files.push({path, staged: false, status: 'untracked'}) // [0,2,0] untracked new file - } else if (head === 0 && workdir === 2 && stage === 2) { - files.push({path, staged: true, status: 'added'}) // [0,2,2] staged new file - } else if (head === 0 && workdir === 2 && stage === 3) { - files.push({path, staged: true, status: 'added'}, {path, staged: false, status: 'modified'}) // [0,2,3] partially staged new file + const classification = classifyTuple(head, workdir, stage) + for (const entry of classification.files) { + files.push({path, staged: entry.staged, status: entry.status}) } - // [1,1,1] unmodified → skip } return files @@ -1365,13 +1335,7 @@ export class IsomorphicGitService implements IGitService { const matrix = await git.statusMatrix({dir, fs}) // Identify staged rows — any file where the index differs from HEAD - const stagedRows = matrix.filter(([, head, , stage]) => { - if (head === 1 && stage === 0) return true // staged deletion or git rm --cached ([1,0,0] and [1,1,0]) - if (head === 0 && stage === 2) return true // staged new file ([0,2,2]) - if (head === 1 && stage === 2) return true // staged modification ([1,2,2]) - if (stage === 3) return true // partially staged ([*,*,3]) - return false - }) + const stagedRows = matrix.filter(([, head, workdir, stage]) => classifyTuple(head, workdir, stage).staged) const toUnstage = filePaths && filePaths.length > 0 diff --git a/src/server/infra/git/status-row-classifier.ts b/src/server/infra/git/status-row-classifier.ts new file mode 100644 index 000000000..d7e04b6a8 --- /dev/null +++ b/src/server/infra/git/status-row-classifier.ts @@ -0,0 +1,108 @@ +import type {ChangedFile, GitStatusFile} from '../../core/interfaces/services/i-git-service.js' + +import {GitError} from '../../core/domain/errors/git-error.js' + +/** Status entry projected to status() output. The caller adds `path`. */ +export type StatusEntry = Pick + +/** + * Classification of a single statusMatrix row's `[head, workdir, stage]` tuple, + * shared across every consumer of `git.statusMatrix`. Six call sites used to + * derive their own dirty/staged/diff projections from raw column comparisons; + * routing every consumer through this single classifier is what guarantees + * they all agree on what each tuple means. + */ +export type RowClassification = { + /** True for any tuple other than `[1,1,1]`. Drives `status.isClean` and `pull`'s overwrite check. */ + dirty: boolean + /** + * Status entries for `status()` output. The caller attaches the path. 0-2 + * entries per row, ordered staged-first then unstaged. + */ + files: StatusEntry[] + /** True when `stage !== head`. Drives `guardStagedConflicts` and `resetUnstage`. */ + staged: boolean + /** HEAD->INDEX diff for `brv vc diff --staged`. Undefined when INDEX matches HEAD. */ + stagedDiff?: ChangedFile['status'] + /** INDEX->WORKDIR diff for `brv vc diff` (default). Undefined when WORKDIR matches INDEX. */ + unstagedDiff?: ChangedFile['status'] +} + +type StagedDiff = ChangedFile['status'] | undefined +type UnstagedDiff = ChangedFile['status'] | undefined + +/** + * isomorphic-git statusMatrix encoding: + * HEAD (h): 0 = absent, 1 = present + * WORKDIR (w): 0 = absent, 1 = matches HEAD, 2 = differs from HEAD + * STAGE (s): 0 = absent, 1 = matches HEAD, 2 = matches WORKDIR, 3 = differs from both + * + * The XY columns native git derives from these are: + * staged column = HEAD vs INDEX + * unstaged column = INDEX vs WORKDIR + * The two helpers below project the encoding into those two columns. + */ +function stagedDiffFor(h: number, s: number): StagedDiff { + if (h === 0 && s === 0) return undefined // not tracked anywhere + if (h === 1 && s === 0) return 'deleted' // HEAD has it, INDEX doesn't + if (h === 0) return 'added' // h=0, s>0 + if (s === 1) return undefined // INDEX matches HEAD, no staged change + return 'modified' // h=1, s in {2,3}: INDEX differs from HEAD +} + +function unstagedDiffFor(s: number, w: number): UnstagedDiff { + if (s === 0) return undefined // INDEX absent: file is either gone or untracked, not a diff + if (w === 0) return 'deleted' // s>0, WORKDIR absent: INDEX has a blob, disk does not + // (s=2,w=0) is unreachable by the encoding (s=2 means INDEX==WORKDIR, so WORKDIR + // absent forces INDEX absent => s=0); the w===0 guard above handles it safely either way. + if (s === 2) return undefined // INDEX matches WORKDIR by definition + if (s === 1 && w === 1) return undefined // INDEX=HEAD and WORKDIR=HEAD => INDEX=WORKDIR transitively + return 'modified' // s=1,w=2 or s=3,w>0 +} + +function validateEncoding(h: number, w: number, s: number): void { + if (h !== 0 && h !== 1) { + throw new GitError(`HEAD column out of range: ${h}; isomorphic-git encoding may have changed`) + } + + if (w !== 0 && w !== 1 && w !== 2) { + throw new GitError(`WORKDIR column out of range: ${w}; isomorphic-git encoding may have changed`) + } + + if (s !== 0 && s !== 1 && s !== 2 && s !== 3) { + throw new GitError(`STAGE column out of range: ${s}; isomorphic-git encoding may have changed`) + } +} + +/** + * Classify a `[head, workdir, stage]` tuple from `git.statusMatrix` into the + * shared projection. Total over the encoding's value range; throws only when + * a column is outside that range, which is the cleanest signal that upstream + * changed the encoding shape. + */ +export function classifyTuple(h: number, w: number, s: number): RowClassification { + validateEncoding(h, w, s) + + const stagedDiff = stagedDiffFor(h, s) + const unstagedDiff = unstagedDiffFor(s, w) + const untracked = s === 0 && w > 0 + + const files: StatusEntry[] = [] + if (stagedDiff) files.push({staged: true, status: stagedDiff}) + if (untracked) { + files.push({staged: false, status: 'untracked'}) + } else if (unstagedDiff) { + files.push({staged: false, status: unstagedDiff}) + } + + return { + // Every tuple except clean [1,1,1]. Note: [0,0,0] (file absent everywhere) would + // evaluate true here with files=[], but is unreachable in practice because + // statusMatrix won't emit a row for a file absent from HEAD, INDEX, and WORKDIR. + dirty: !(h === 1 && w === 1 && s === 1), + files, + staged: stagedDiff !== undefined, + stagedDiff, + unstagedDiff, + } +} diff --git a/src/server/infra/process/curate-log-handler.ts b/src/server/infra/process/curate-log-handler.ts index 67ae14a4b..1ad84475c 100644 --- a/src/server/infra/process/curate-log-handler.ts +++ b/src/server/infra/process/curate-log-handler.ts @@ -16,6 +16,13 @@ type TaskState = { entry: CurateLogEntry operations: CurateLogOperation[] projectPath: string + /** + * Snapshot of the project's `reviewDisabled` flag captured at task-create time. + * Held for the task lifetime so onToolResult and onTaskCompleted observe a single value + * even if the user toggles it mid-task. Sourced from `task.reviewDisabled`, which the + * daemon stamps once at the task-create boundary. + */ + reviewDisabled: boolean } const CURATE_TASK_TYPES = ['curate', 'curate-folder'] as const @@ -96,6 +103,9 @@ export class CurateLogHandler implements ITaskLifecycleHook { /** * @param createStore - Optional factory for testing. Default: FileCurateLogStore. * @param onPendingReviews - Optional callback fired when curate completes with pending review ops. + * + * Whether reviews are disabled is read directly from `task.reviewDisabled` (snapshotted by + * the daemon at task-create time). Undefined means review enabled (fail-open). */ constructor( private readonly createStore?: (projectPath: string) => ICurateLogStore, @@ -210,7 +220,8 @@ export class CurateLogHandler implements ITaskLifecycleHook { // Caching `entry` here lets onTaskCompleted/onTaskError rebuild the final entry // without a getById round-trip — so completion is never lost even if this initial // save fails. - this.tasks.set(task.taskId, {entry, operations: [], projectPath: task.projectPath}) + const reviewDisabled = task.reviewDisabled ?? false + this.tasks.set(task.taskId, {entry, operations: [], projectPath: task.projectPath, reviewDisabled}) this.activeTaskCount.set(task.projectPath, (this.activeTaskCount.get(task.projectPath) ?? 0) + 1) // Fire-and-forget: logId is already known, save is best-effort. @@ -252,7 +263,7 @@ export class CurateLogHandler implements ITaskLifecycleHook { const ops = extractCurateOperations(payload) for (const op of ops) { - if (op.needsReview && op.status === 'success') { + if (op.needsReview && op.status === 'success' && !state.reviewDisabled) { op.reviewStatus = 'pending' } diff --git a/src/server/infra/process/feature-handlers.ts b/src/server/infra/process/feature-handlers.ts index 2b503cb85..355f33e80 100644 --- a/src/server/infra/process/feature-handlers.ts +++ b/src/server/infra/process/feature-handlers.ts @@ -240,6 +240,7 @@ export async function setupFeatureHandlers({ onResolved({ projectPath, taskId }) { broadcastToProject(projectPath, ReviewEvents.NOTIFY, { pendingCount: 0, reviewUrl: '', taskId }) }, + projectConfigStore, resolveProjectPath, reviewBackupStoreFactory: (projectPath) => new FileReviewBackupStore(join(projectPath, BRV_DIR)), transport, diff --git a/src/server/infra/process/task-router.ts b/src/server/infra/process/task-router.ts index 19056cf0a..4903971e1 100644 --- a/src/server/infra/process/task-router.ts +++ b/src/server/infra/process/task-router.ts @@ -81,10 +81,21 @@ export type PreDispatchCheckResult = {eligible: false; skipResult: string} | {el export type PreDispatchCheck = (task: TaskCreateRequest, projectPath?: string) => Promise +/** + * Resolves whether the review log is disabled for the given project. Called once + * at task-create and the result is stamped onto TaskInfo + TaskExecute, so daemon + * (CurateLogHandler) and agent (curate backups, dream review entries) observe a + * single value across the daemon→agent process boundary. Errors → undefined → + * downstream treats as enabled (fail-open). + */ +export type IsReviewDisabledResolver = (projectPath: string) => Promise + type TaskRouterOptions = { agentPool?: IAgentPool /** Function to resolve agent clientId for a given project */ getAgentForProject: (projectPath?: string) => string | undefined + /** Resolves project's review-disabled flag at task-create. Optional; missing → undefined → enabled. */ + isReviewDisabled?: IsReviewDisabledResolver /** Lifecycle hooks for task events (e.g. CurateLogHandler). */ lifecycleHooks?: ITaskLifecycleHook[] /** @@ -132,6 +143,7 @@ export class TaskRouter { */ private completedTasks: Map = new Map() private readonly getAgentForProject: (projectPath?: string) => string | undefined + private readonly isReviewDisabled: IsReviewDisabledResolver | undefined private readonly lifecycleHooks: ITaskLifecycleHook[] private readonly preDispatchCheck: TaskRouterOptions['preDispatchCheck'] private readonly projectRegistry: IProjectRegistry | undefined @@ -145,6 +157,7 @@ export class TaskRouter { this.transport = options.transport this.agentPool = options.agentPool this.getAgentForProject = options.getAgentForProject + this.isReviewDisabled = options.isReviewDisabled this.lifecycleHooks = options.lifecycleHooks ?? [] this.preDispatchCheck = options.preDispatchCheck this.projectRegistry = options.projectRegistry @@ -526,7 +539,19 @@ export class TaskRouter { clientId, ) - // ── Await lifecycle hooks ───────────────────────────────────────────────── + // ── Snapshot reviewDisabled + await lifecycle hooks ─────────────────────── + + // Snapshot the project's review-disabled flag once at the task-create boundary. + // Placed after the synchronous tasks.set/task:created so callers that don't + // await the create handler still see the task in this.tasks immediately. + // The value is stamped onto TaskInfo (for CurateLogHandler) and TaskExecute + // (forwarded to the agent) so both sides observe a single consistent value + // even if the user toggles mid-task. Errors → undefined → fail-open enabled. + const reviewDisabled = await this.snapshotReviewDisabled(projectPath) + const taskAfterSnapshot = this.tasks.get(taskId) + if (taskAfterSnapshot && reviewDisabled !== undefined) { + this.tasks.set(taskId, {...taskAfterSnapshot, reviewDisabled}) + } const logId = await this.runCreateHooks(taskId) const task = this.tasks.get(taskId) @@ -576,6 +601,7 @@ export class TaskRouter { ...(data.folderPath ? {folderPath: data.folderPath} : {}), ...(data.force === undefined ? {} : {force: data.force}), ...(projectPath ? {projectPath} : {}), + ...(reviewDisabled === undefined ? {} : {reviewDisabled}), taskId, type: data.type, ...(worktreeRoot ? {worktreeRoot} : {}), @@ -983,4 +1009,33 @@ export class TaskRouter { return logIds.find((id): id is string => typeof id === 'string') } + + /** + * Reads the project's reviewDisabled flag at task-create. + * + * Returns `undefined` only when no resolver is wired or no projectPath was + * resolved — those are legitimate "not configured" cases where downstream + * consumers fall back to their own resolution path. + * + * On resolver THROW, returns the explicit boolean `false` (review enabled = + * fail-open) so the daemon and the agent observe a single concrete value. + * Returning `undefined` here would re-introduce the exact divergence the + * snapshot is supposed to prevent: daemon stamps no field → CurateLogHandler + * uses `?? false` (enabled) while the agent process opens no ALS scope and + * may read `reviewDisabled: true` from `.brv/config.json` in the + * curate-tool fallback, producing pending review entries without backups + * (or vice versa). Aligns with the agent-side `isReviewDisabledForBrvDir` + * which also fails open. + */ + private async snapshotReviewDisabled(projectPath: string | undefined): Promise { + if (!this.isReviewDisabled || !projectPath) return undefined + try { + return await this.isReviewDisabled(projectPath) + } catch (error_) { + transportLog( + `TaskRouter: isReviewDisabled resolver threw for ${projectPath} — defaulting to enabled: ${error_ instanceof Error ? error_.message : String(error_)}`, + ) + return false + } + } } diff --git a/src/server/infra/process/transport-handlers.ts b/src/server/infra/process/transport-handlers.ts index c134d9be2..2e57b689d 100644 --- a/src/server/infra/process/transport-handlers.ts +++ b/src/server/infra/process/transport-handlers.ts @@ -33,17 +33,19 @@ import type {ITaskLifecycleHook} from '../../core/interfaces/process/i-task-life import type {IProjectRegistry} from '../../core/interfaces/project/i-project-registry.js' import type {IProjectRouter} from '../../core/interfaces/routing/i-project-router.js' import type {ITransportServer} from '../../core/interfaces/transport/i-transport-server.js' -import type {PreDispatchCheck} from './task-router.js' +import type {IsReviewDisabledResolver, PreDispatchCheck} from './task-router.js' import {ConnectionCoordinator} from './connection-coordinator.js' import {TaskRouter} from './task-router.js' -export type {PreDispatchCheck, PreDispatchCheckResult} from './task-router.js' +export type {IsReviewDisabledResolver, PreDispatchCheck, PreDispatchCheckResult} from './task-router.js' export type {TaskInfo} from './types.js' type TransportHandlersOptions = { agentPool?: IAgentPool clientManager?: IClientManager + /** Resolves project's review-disabled flag at task-create. Snapshotted once into TaskInfo + TaskExecute. */ + isReviewDisabled?: IsReviewDisabledResolver /** Lifecycle hooks for task events (e.g. CurateLogHandler). */ lifecycleHooks?: ITaskLifecycleHook[] /** Optional daemon-side gate run before dispatching a task to the agent pool. */ @@ -67,6 +69,7 @@ export class TransportHandlers { this.taskRouter = new TaskRouter({ agentPool: options.agentPool, getAgentForProject: (projectPath) => this.connectionCoordinator.getAgentForProject(projectPath), + isReviewDisabled: options.isReviewDisabled, lifecycleHooks: options.lifecycleHooks, preDispatchCheck: options.preDispatchCheck, projectRegistry: options.projectRegistry, diff --git a/src/server/infra/storage/file-provider-config-store.ts b/src/server/infra/storage/file-provider-config-store.ts index ba1eb5e37..b2819b772 100644 --- a/src/server/infra/storage/file-provider-config-store.ts +++ b/src/server/infra/storage/file-provider-config-store.ts @@ -53,7 +53,9 @@ export class FileProviderConfigStore implements IProviderConfigStore { } /** - * Marks a provider as connected. + * Marks a provider as connected. Defaults to also marking it as active — + * pass `setAsActive: false` to skip the activate step (used when the + * provider has no usable model yet). */ public async connectProvider( providerId: string, @@ -62,10 +64,12 @@ export class FileProviderConfigStore implements IProviderConfigStore { authMethod?: 'api-key' | 'oauth' baseUrl?: string oauthAccountId?: string + setAsActive?: boolean }, ): Promise { const config = await this.read() - const newConfig = config.withProviderConnected(providerId, options).withActiveProvider(providerId) + const connected = config.withProviderConnected(providerId, options) + const newConfig = options?.setAsActive === false ? connected : connected.withActiveProvider(providerId) await this.write(newConfig) } diff --git a/src/server/infra/transport/handlers/provider-handler.ts b/src/server/infra/transport/handlers/provider-handler.ts index 6bcd83d21..38834f1ef 100644 --- a/src/server/infra/transport/handlers/provider-handler.ts +++ b/src/server/infra/transport/handlers/provider-handler.ts @@ -42,6 +42,7 @@ import {TransportDaemonEventNames} from '../../../core/domain/transport/schemas. import {getErrorMessage} from '../../../utils/error-helpers.js' import {processLog} from '../../../utils/process-logger.js' import {validateApiKey as validateApiKeyViaFetcher} from '../../http/provider-model-fetcher-registry.js' +import {OpenAICompatibleModelFetcher} from '../../http/provider-model-fetchers.js' import { computeExpiresAt, exchangeCodeForTokens as defaultExchangeCodeForTokens, @@ -51,6 +52,14 @@ import { ProviderCallbackTimeoutError, } from '../../provider-oauth/index.js' +async function defaultValidateOpenAICompatibleEndpoint(params: { + apiKey: string + baseUrl: string +}): Promise<{error?: string; isValid: boolean}> { + const fetcher = new OpenAICompatibleModelFetcher(params.baseUrl, 'OpenAI Compatible') + return fetcher.validateApiKey(params.apiKey) +} + type OAuthFlowState = { awaitInProgress?: boolean callbackServer?: ProviderCallbackServer @@ -72,6 +81,11 @@ export interface ProviderHandlerDeps { providerKeychainStore: IProviderKeychainStore providerOAuthTokenStore: IProviderOAuthTokenStore transport: ITransportServer + /** Validator for openai-compatible base URL (injectable for testing) */ + validateOpenAICompatibleEndpoint?: (params: { + apiKey: string + baseUrl: string + }) => Promise<{error?: string; isValid: boolean}> } /** @@ -89,6 +103,10 @@ export class ProviderHandler { private readonly providerKeychainStore: IProviderKeychainStore private readonly providerOAuthTokenStore: IProviderOAuthTokenStore private readonly transport: ITransportServer + private readonly validateOpenAICompatibleEndpoint: (params: { + apiKey: string + baseUrl: string + }) => Promise<{error?: string; isValid: boolean}> constructor(deps: ProviderHandlerDeps) { this.authStateStore = deps.authStateStore @@ -100,6 +118,8 @@ export class ProviderHandler { this.providerKeychainStore = deps.providerKeychainStore this.providerOAuthTokenStore = deps.providerOAuthTokenStore this.transport = deps.transport + this.validateOpenAICompatibleEndpoint = + deps.validateOpenAICompatibleEndpoint ?? defaultValidateOpenAICompatibleEndpoint } setup(): void { @@ -240,16 +260,53 @@ export class ProviderHandler { return {error: 'ByteRover Provider requires authentication. Run /login or brv login to sign in', success: false} } + // Verify openai-compatible endpoint is reachable before persisting anything — + // a failed setup must not leave a placeholder config that masquerades as + // connected. Falls back to existing baseUrl/keychain key on reconfigure + // when the request omits them, so a partial reconfigure (e.g. only changing + // the URL) still validates with the user's stored credentials. + if (providerId === 'openai-compatible') { + const existingBaseUrl = await this.providerConfigStore.read().then((c) => c.getBaseUrl(providerId)) + const effectiveBaseUrl = baseUrl ?? existingBaseUrl + if (!effectiveBaseUrl) { + return { + error: 'A base URL is required for OpenAI-compatible providers (e.g. http://localhost:11434/v1)', + success: false, + } + } + + const effectiveApiKey = apiKey ?? (await this.providerKeychainStore.getApiKey(providerId)) ?? '' + const validation = await this.validateOpenAICompatibleEndpoint({ + apiKey: effectiveApiKey, + baseUrl: effectiveBaseUrl, + }) + if (!validation.isValid) { + const detail = validation.error ? `: ${validation.error}` : '' + return { + error: `Could not reach OpenAI-compatible endpoint at ${effectiveBaseUrl}${detail}`, + success: false, + } + } + } + // Store API key if provided (supports optional keys for openai-compatible) if (apiKey) { await this.providerKeychainStore.setApiKey(providerId, apiKey) } const provider = getProviderById(providerId) + // Skip activating the provider when it ends up with no active model — + // the welcome view treats `activeProvider w/o activeModel` as + // "needs setup" and unmounts any in-flight setup flow on the home + // page. The model:setActive handler activates the provider when the + // user picks a model, which is the right moment. + const willHaveActiveModel = Boolean(provider?.defaultModel) + || Boolean(await this.providerConfigStore.getActiveModel(providerId)) await this.providerConfigStore.connectProvider(providerId, { activeModel: provider?.defaultModel, authMethod: apiKey ? 'api-key' : undefined, baseUrl, + setAsActive: willHaveActiveModel, }) this.transport.broadcast(TransportDaemonEventNames.PROVIDER_UPDATED, {}) @@ -300,6 +357,7 @@ export class ProviderHandler { const authMethod = providerConfig?.authMethod return { + activeModel: providerConfig?.activeModel, apiKeyUrl: def.apiKeyUrl, authMethod, category: def.category, diff --git a/src/server/infra/transport/handlers/review-handler.ts b/src/server/infra/transport/handlers/review-handler.ts index 2b1463c01..9d3234669 100644 --- a/src/server/infra/transport/handlers/review-handler.ts +++ b/src/server/infra/transport/handlers/review-handler.ts @@ -2,6 +2,7 @@ import {mkdir, unlink, writeFile} from 'node:fs/promises' import {dirname, join, relative} from 'node:path' import type {ICurateLogStore} from '../../../core/interfaces/storage/i-curate-log-store.js' +import type {IProjectConfigStore} from '../../../core/interfaces/storage/i-project-config-store.js' import type {IReviewBackupStore} from '../../../core/interfaces/storage/i-review-backup-store.js' import type {ITransportServer} from '../../../core/interfaces/transport/i-transport-server.js' @@ -9,9 +10,12 @@ import { type ReviewDecideTaskRequest, type ReviewDecideTaskResponse, ReviewEvents, + type ReviewGetDisabledResponse, type ReviewPendingOperation, type ReviewPendingResponse, type ReviewPendingTask, + type ReviewSetDisabledRequest, + type ReviewSetDisabledResponse, } from '../../../../shared/transport/events/review-events.js' import {BRV_DIR, CONTEXT_TREE_DIR} from '../../../constants.js' import {type ProjectPathResolver, resolveRequiredProjectPath} from './handler-types.js' @@ -25,6 +29,7 @@ export interface ReviewHandlerDeps { curateLogStoreFactory: CurateLogStoreFactory /** Called after all pending ops for a task are decided. Used to notify TUI clients. */ onResolved?: (info: {projectPath: string; taskId: string}) => void + projectConfigStore: IProjectConfigStore resolveProjectPath: ProjectPathResolver reviewBackupStoreFactory: ReviewBackupStoreFactory transport: ITransportServer @@ -54,6 +59,7 @@ async function writeFileWithDirs(absolutePath: string, content: string): Promise export class ReviewHandler { private readonly curateLogStoreFactory: CurateLogStoreFactory private readonly onResolved: ReviewHandlerDeps['onResolved'] + private readonly projectConfigStore: IProjectConfigStore private readonly resolveProjectPath: ProjectPathResolver private readonly reviewBackupStoreFactory: ReviewBackupStoreFactory private readonly transport: ITransportServer @@ -61,6 +67,7 @@ export class ReviewHandler { constructor(deps: ReviewHandlerDeps) { this.curateLogStoreFactory = deps.curateLogStoreFactory this.onResolved = deps.onResolved + this.projectConfigStore = deps.projectConfigStore this.resolveProjectPath = deps.resolveProjectPath this.reviewBackupStoreFactory = deps.reviewBackupStoreFactory this.transport = deps.transport @@ -72,10 +79,20 @@ export class ReviewHandler { (data, clientId) => this.handleDecideTask(data, clientId), ) + this.transport.onRequest, ReviewGetDisabledResponse>( + ReviewEvents.GET_DISABLED, + (_data, clientId) => this.handleGetDisabled(clientId), + ) + this.transport.onRequest, ReviewPendingResponse>( ReviewEvents.PENDING, (_data, clientId) => this.handlePending(clientId), ) + + this.transport.onRequest( + ReviewEvents.SET_DISABLED, + (data, clientId) => this.handleSetDisabled(data, clientId), + ) } private async handleDecideTask( @@ -194,6 +211,16 @@ export class ReviewHandler { return {files: fileResults.map(({path, reverted}) => ({path, reverted})), totalCount} } + private async handleGetDisabled(clientId: string): Promise { + const projectPath = resolveRequiredProjectPath(this.resolveProjectPath, clientId) + const config = await this.projectConfigStore.read(projectPath) + if (!config) { + throw new Error(`Project not initialized: ${projectPath}. Run \`brv init\` first.`) + } + + return {reviewDisabled: config.reviewDisabled === true} + } + private async handlePending(clientId: string): Promise { const projectPath = resolveRequiredProjectPath(this.resolveProjectPath, clientId) const contextTreeDir = join(projectPath, BRV_DIR, CONTEXT_TREE_DIR) @@ -235,4 +262,19 @@ export class ReviewHandler { return {pendingCount, tasks} } + + private async handleSetDisabled( + {reviewDisabled}: ReviewSetDisabledRequest, + clientId: string, + ): Promise { + const projectPath = resolveRequiredProjectPath(this.resolveProjectPath, clientId) + const config = await this.projectConfigStore.read(projectPath) + if (!config) { + throw new Error(`Project not initialized: ${projectPath}. Run \`brv init\` first.`) + } + + const updated = config.withReviewDisabled(reviewDisabled) + await this.projectConfigStore.write(updated, projectPath) + return {reviewDisabled} + } } diff --git a/src/shared/transport/events/review-events.ts b/src/shared/transport/events/review-events.ts index 7c864f54e..a1b29072b 100644 --- a/src/shared/transport/events/review-events.ts +++ b/src/shared/transport/events/review-events.ts @@ -1,9 +1,23 @@ export const ReviewEvents = { DECIDE_TASK: 'review:decideTask', + GET_DISABLED: 'review:getDisabled', NOTIFY: 'review:notify', PENDING: 'review:pending', + SET_DISABLED: 'review:setDisabled', } as const +export interface ReviewGetDisabledResponse { + reviewDisabled: boolean +} + +export interface ReviewSetDisabledRequest { + reviewDisabled: boolean +} + +export interface ReviewSetDisabledResponse { + reviewDisabled: boolean +} + export interface ReviewNotifyEvent { pendingCount: number reviewUrl: string diff --git a/src/shared/transport/types/dto.ts b/src/shared/transport/types/dto.ts index 9607ca544..b7c6a4da3 100644 --- a/src/shared/transport/types/dto.ts +++ b/src/shared/transport/types/dto.ts @@ -79,6 +79,8 @@ export interface ConnectorDTO { // ============================================================================ export interface ProviderDTO { + /** Currently selected model for this provider, if any. Absent means connected but model not yet picked. */ + activeModel?: string apiKeyUrl?: string authMethod?: 'api-key' | 'oauth' category: 'other' | 'popular' diff --git a/src/tui/features/provider/components/provider-flow.tsx b/src/tui/features/provider/components/provider-flow.tsx index cde788c8e..db2f9462b 100644 --- a/src/tui/features/provider/components/provider-flow.tsx +++ b/src/tui/features/provider/components/provider-flow.tsx @@ -42,6 +42,17 @@ interface ProviderAction { name: string } +/** + * Throws on transport responses that ack with `success: false` so the + * existing try/catch surfaces server-side errors instead of silently + * marching forward into the next step. + */ +function ensureSuccess(response: {error?: string; success: boolean}): void { + if (!response.success) { + throw new Error(response.error ?? 'Operation failed') + } +} + export interface ProviderFlowProps { /** Hide the Cancel keybind in provider selection */ hideCancelButton?: boolean @@ -165,9 +176,18 @@ export const ProviderFlow: React.FC = ({ return } - // Already connected → show actions menu + // Already connected → show actions menu. Exception: openai-compatible + // is the only provider that can land in a connected-but-no-active-model + // state (no canonical defaultModel exists for arbitrary endpoints), so + // when it's the current provider we jump straight to the model picker + // so the welcome view's user can finish setup. For non-current + // half-configured providers we still show the actions menu so + // Disconnect / Set as active stay reachable (the picker would otherwise + // be a dead-end if the endpoint is down). if (provider.isConnected) { - setStep('provider_actions') + const needsModelPick = + provider.id === 'openai-compatible' && !provider.activeModel && provider.isCurrent + setStep(needsModelPick ? 'model_select' : 'provider_actions') return } @@ -321,15 +341,22 @@ export const ProviderFlow: React.FC = ({ setStep('connecting') try { - await connectMutation.mutateAsync({ + ensureSuccess(await connectMutation.mutateAsync({ apiKey: apiKey || undefined, baseUrl: baseUrl || undefined, providerId: selectedProvider.id, - }) + })) setStep('model_select') } catch (error_) { setError(formatTransportError(error_)) - setStep('api_key') + // Server rejection (e.g. unreachable openai-compatible URL) — return to + // the provider list where the error is rendered. The user can re-enter + // the flow with a corrected URL or API key. Mirror the other failure + // paths (e.g. handleSelect at the byterover branch) by clearing the + // selected provider too. + setStep('select') + setSelectedProvider(null) + setBaseUrl(null) } }, [baseUrl, connectMutation, selectedProvider]) diff --git a/test/commands/review-toggle.test.ts b/test/commands/review-toggle.test.ts new file mode 100644 index 000000000..0b3fb0c0b --- /dev/null +++ b/test/commands/review-toggle.test.ts @@ -0,0 +1,195 @@ +import type {ConnectionResult, ITransportClient} from '@campfirein/brv-transport-client' +import type {Config} from '@oclif/core' + +import {Config as OclifConfig} from '@oclif/core' +import {expect} from 'chai' +import sinon, {restore, stub} from 'sinon' + +import Review from '../../src/oclif/commands/review.js' +import {ReviewEvents} from '../../src/shared/transport/events/review-events.js' + +class TestableReview extends Review { + private readonly mockConnector: () => Promise + + constructor(argv: string[], mockConnector: () => Promise, config: Config) { + super(argv, config) + this.mockConnector = mockConnector + } + + protected override getDaemonClientOptions() { + return {maxRetries: 1, retryDelayMs: 0, transportConnector: this.mockConnector} + } +} + +describe('Review (top-level toggle command)', () => { + let config: Config + let loggedMessages: string[] + let stdoutOutput: string[] + let mockClient: sinon.SinonStubbedInstance + let mockConnector: sinon.SinonStub<[], Promise> + + before(async () => { + config = await OclifConfig.load(import.meta.url) + }) + + beforeEach(() => { + loggedMessages = [] + stdoutOutput = [] + + mockClient = { + connect: stub().resolves(), + disconnect: stub().resolves(), + getClientId: stub().returns('test-client-id'), + getState: stub().returns('connected'), + isConnected: stub().resolves(true), + joinRoom: stub().resolves(), + leaveRoom: stub().resolves(), + on: stub().returns(() => {}), + once: stub(), + onStateChange: stub().returns(() => {}), + request: stub() as unknown as ITransportClient['request'], + requestWithAck: stub().resolves(), + } as unknown as sinon.SinonStubbedInstance + + mockConnector = stub<[], Promise>().resolves({ + client: mockClient as unknown as ITransportClient, + projectRoot: '/test/project', + }) + }) + + afterEach(() => { + restore() + }) + + function makeCommand(...argv: string[]): TestableReview { + const cmd = new TestableReview(argv, mockConnector, config) + stub(cmd, 'log').callsFake((msg?: string) => { + loggedMessages.push(msg ?? '') + }) + return cmd + } + + function makeJsonCommand(...argv: string[]): TestableReview { + const cmd = new TestableReview([...argv, '--format', 'json'], mockConnector, config) + stub(cmd, 'log').callsFake((msg?: string) => { + if (msg) loggedMessages.push(msg) + }) + stub(process.stdout, 'write').callsFake((chunk: string | Uint8Array) => { + stdoutOutput.push(String(chunk)) + return true + }) + return cmd + } + + function parseJsonOutput(): {command: string; data: Record; success: boolean} { + return JSON.parse(stdoutOutput.join('').trim()) + } + + describe('--disable', () => { + it('sends review:setDisabled with reviewDisabled=true', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: true}) + + await makeCommand('--disable').run() + + const call = (mockClient.requestWithAck as sinon.SinonStub).firstCall + expect(call.args[0]).to.equal(ReviewEvents.SET_DISABLED) + expect(call.args[1]).to.deep.equal({reviewDisabled: true}) + }) + + it('prints confirmation in text mode', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: true}) + + await makeCommand('--disable').run() + + expect(loggedMessages.some((m) => m.toLowerCase().includes('disabled'))).to.be.true + }) + + it('outputs JSON success in json mode', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: true}) + + await makeJsonCommand('--disable').run() + + const json = parseJsonOutput() + expect(json.command).to.equal('review') + expect(json.success).to.be.true + expect(json.data).to.have.property('reviewDisabled', true) + }) + + it('reports error when daemon rejects (project not initialized)', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).rejects( + new Error('Project not initialized: /test/project. Run `brv init` first.'), + ) + + await makeJsonCommand('--disable').run() + + const json = parseJsonOutput() + expect(json.success).to.be.false + expect(json.data).to.have.property('status', 'error') + expect(String(json.data.error)).to.match(/not initialized/i) + }) + }) + + describe('--enable', () => { + it('sends review:setDisabled with reviewDisabled=false', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: false}) + + await makeCommand('--enable').run() + + const call = (mockClient.requestWithAck as sinon.SinonStub).firstCall + expect(call.args[0]).to.equal(ReviewEvents.SET_DISABLED) + expect(call.args[1]).to.deep.equal({reviewDisabled: false}) + }) + + it('prints confirmation in text mode', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: false}) + + await makeCommand('--enable').run() + + expect(loggedMessages.some((m) => m.toLowerCase().includes('enabled'))).to.be.true + }) + }) + + describe('without flags (status)', () => { + it('sends review:getDisabled and prints enabled when reviewDisabled=false', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: false}) + + await makeCommand().run() + + const call = (mockClient.requestWithAck as sinon.SinonStub).firstCall + expect(call.args[0]).to.equal(ReviewEvents.GET_DISABLED) + expect(loggedMessages.some((m) => m.toLowerCase().includes('enabled'))).to.be.true + }) + + it('prints disabled when reviewDisabled=true', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: true}) + + await makeCommand().run() + + expect(loggedMessages.some((m) => m.toLowerCase().includes('disabled'))).to.be.true + }) + + it('outputs JSON status', async () => { + ;(mockClient.requestWithAck as sinon.SinonStub).resolves({reviewDisabled: false}) + + await makeJsonCommand().run() + + const json = parseJsonOutput() + expect(json.success).to.be.true + expect(json.data).to.have.property('reviewDisabled', false) + }) + }) + + describe('flag validation', () => { + it('rejects passing both --disable and --enable', async () => { + let threw = false + try { + await makeCommand('--disable', '--enable').run() + } catch { + threw = true + } + + expect(threw).to.be.true + expect((mockClient.requestWithAck as sinon.SinonStub).called).to.be.false + }) + }) +}) diff --git a/test/integration/infra/git/isomorphic-git-service.test.ts b/test/integration/infra/git/isomorphic-git-service.test.ts index debc4eb27..f8da888ef 100644 --- a/test/integration/infra/git/isomorphic-git-service.test.ts +++ b/test/integration/infra/git/isomorphic-git-service.test.ts @@ -1,7 +1,7 @@ import {expect} from 'chai' import * as git from 'isomorphic-git' import fs, {existsSync} from 'node:fs' -import {mkdir, readFile, rm, unlink, writeFile} from 'node:fs/promises' +import {mkdir, readFile, rm, unlink, utimes, writeFile} from 'node:fs/promises' import {tmpdir} from 'node:os' import {join} from 'node:path' import {stub} from 'sinon' @@ -11,6 +11,7 @@ import type {IAuthStateStore} from '../../../../src/server/core/interfaces/state import {AuthToken} from '../../../../src/server/core/domain/entities/auth-token.js' import {GitAuthError, GitError} from '../../../../src/server/core/domain/errors/git-error.js' import {IsomorphicGitService} from '../../../../src/server/infra/git/isomorphic-git-service.js' +import {classifyTuple} from '../../../../src/server/infra/git/status-row-classifier.js' const COGIT_BASE = 'https://fake-cgit.example.com' @@ -355,6 +356,45 @@ describe('IsomorphicGitService', () => { expect(entries).to.deep.include({path: 'tracked.md', staged: false, status: 'untracked'}) }) + it('[1,2,0] git rm --cached then edit reports staged deletion + untracked', async () => { + await writeFile(join(testDir, 'tracked.md'), 'original') + await service.add({directory: testDir, filePaths: ['tracked.md']}) + await service.commit({directory: testDir, message: 'initial'}) + + await git.remove({dir: testDir, filepath: 'tracked.md', fs}) + await writeFile(join(testDir, 'tracked.md'), 'edited after rm --cached') + + const matrix = await git.statusMatrix({dir: testDir, fs}) + expect(matrix).to.deep.equal([['tracked.md', 1, 2, 0]]) + + const result = await service.status({directory: testDir}) + expect(result.isClean).to.be.false + const entries = result.files.filter((f) => f.path === 'tracked.md') + expect(entries).to.have.length(2) + expect(entries).to.deep.include({path: 'tracked.md', staged: true, status: 'deleted'}) + expect(entries).to.deep.include({path: 'tracked.md', staged: false, status: 'untracked'}) + }) + + it('[0,0,3] staged new file then deleted from disk reports add + delete', async () => { + // Need an existing commit so git.statusMatrix has a HEAD to compare against. + await writeFile(join(testDir, 'seed.md'), 'seed') + await service.add({directory: testDir, filePaths: ['seed.md']}) + await service.commit({directory: testDir, message: 'seed'}) + + await writeFile(join(testDir, 'fresh.md'), 'staged content') + await service.add({directory: testDir, filePaths: ['fresh.md']}) + await unlink(join(testDir, 'fresh.md')) + + const matrix = await git.statusMatrix({dir: testDir, fs}) + const freshRow = matrix.find((row) => row[0] === 'fresh.md') + expect(freshRow).to.deep.equal(['fresh.md', 0, 0, 3]) + + const result = await service.status({directory: testDir}) + const entries = result.files.filter((f) => f.path === 'fresh.md') + expect(entries).to.deep.include({path: 'fresh.md', staged: true, status: 'added'}) + expect(entries).to.deep.include({path: 'fresh.md', staged: false, status: 'deleted'}) + }) + it('[0,2,3] reports partially staged new file as staged added + unstaged modified', async () => { // new file: add to index (staged added), then modify on disk without re-staging await writeFile(join(testDir, 'new.md'), 'original') @@ -369,6 +409,66 @@ describe('IsomorphicGitService', () => { expect(entries).to.deep.include({path: 'new.md', staged: false, status: 'modified'}) }) + it('[1,1,3] reports both staged + unstaged modifications when workdir is restored to HEAD after add', async () => { + // Reachable in the wild via editor undo+autosave, AI agent revert, or sync-tool rollback + // after `brv vc add`: workdir matches HEAD, but the index still holds the staged blob. + // Native git reports BOTH a staged modification (HEAD->INDEX) AND an unstaged + // modification (INDEX->WORKDIR), since INDEX differs from both. + const tracked = join(testDir, 'tracked.md') + await writeFile(tracked, 'v1\n') + await service.add({directory: testDir, filePaths: ['tracked.md']}) + await service.commit({directory: testDir, message: 'initial'}) + + await writeFile(tracked, 'v2\n') + await service.add({directory: testDir, filePaths: ['tracked.md']}) + + // Filesystem-only restore to HEAD content; index untouched. + // Force a distinct mtime so isomorphic-git re-reads the workdir blob instead of + // trusting the index's stat cache (which would yield [1,2,2] otherwise). + await writeFile(tracked, 'v1\n') + const future = new Date(Date.now() + 2000) + await utimes(tracked, future, future) + + const matrix = await git.statusMatrix({dir: testDir, fs}) + expect(matrix).to.deep.equal([['tracked.md', 1, 1, 3]]) + + const result = await service.status({directory: testDir}) + + expect(result.isClean).to.be.false + const entries = result.files.filter((f) => f.path === 'tracked.md') + expect(entries).to.have.length(2) + expect(entries).to.deep.include({path: 'tracked.md', staged: true, status: 'modified'}) + expect(entries).to.deep.include({path: 'tracked.md', staged: false, status: 'modified'}) + }) + + it('status.isClean implies pull dirty-filter sees no rows (cross-property invariant)', async () => { + // Engineer the [1,1,3] tuple. status() and pull() must agree on cleanliness: + // both project the matrix through classifyTuple, so the dirty set computed + // here must mirror what pull() actually inspects internally. + const path = join(testDir, 'a.md') + await writeFile(path, 'v1\n') + await service.add({directory: testDir, filePaths: ['a.md']}) + await service.commit({directory: testDir, message: 'initial'}) + + await writeFile(path, 'v2\n') + await service.add({directory: testDir, filePaths: ['a.md']}) + await writeFile(path, 'v1\n') + const future = new Date(Date.now() + 2000) + await utimes(path, future, future) + + const status = await service.status({directory: testDir}) + const matrix = await git.statusMatrix({dir: testDir, fs}) + const pullDirty = matrix + .filter(([, head, workdir, stage]) => classifyTuple(head, workdir, stage).dirty) + .map((row) => String(row[0])) + + if (status.isClean) { + expect(pullDirty, 'pull would consider files dirty while status reports clean').to.deep.equal([]) + } else { + expect(status.files.map((f) => f.path)).to.include.members(pullDirty) + } + }) + it('reports correct statuses for multiple files with mixed states', async () => { // Setup: one committed file (becomes base for HEAD entries) await writeFile(join(testDir, 'committed.md'), 'content') @@ -1597,5 +1697,4 @@ describe('IsomorphicGitService', () => { expect(content).to.equal('v1\n') }) }) - }) diff --git a/test/integration/infra/git/status-tuple-reachability.test.ts b/test/integration/infra/git/status-tuple-reachability.test.ts new file mode 100644 index 000000000..48fb3d7f6 --- /dev/null +++ b/test/integration/infra/git/status-tuple-reachability.test.ts @@ -0,0 +1,345 @@ +/** + * Tuple reachability harness for `git.statusMatrix`. + * + * Drives statusMatrix through a corpus of operation sequences that cover every + * git-state transition we can think of, then asserts that every tuple it + * produces is classifiable by `classifyTuple` (i.e. doesn't throw). + * + * The bug class this guards against is a silent-drop: a reachable tuple + * exists in the wild but no consumer enumerates it. Throw-on-unknown in the + * unified classifier converts that silent-drop into a loud failure, but only + * if the test corpus actually exercises the tuple. This harness IS that corpus. + * + * If a scenario surfaces a new tuple, classifyTuple throws and this test + * fails. The fix is then to add the tuple to the classifier with the right + * projection, and to add the scenario as a permanent regression case. + */ +import {expect} from 'chai' +import * as git from 'isomorphic-git' +import fs from 'node:fs' +import {mkdir, rm, unlink, utimes, writeFile} from 'node:fs/promises' +import {tmpdir} from 'node:os' +import {join} from 'node:path' + +import {classifyTuple} from '../../../../src/server/infra/git/status-row-classifier.js' + +const author = {email: 't@t.com', name: 'T'} + +function makeDir(): string { + return join(tmpdir(), `tuple-fuzz-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`) +} + +async function initWithFile(dir: string, file: string, content: string): Promise { + await mkdir(dir, {recursive: true}) + await git.init({defaultBranch: 'main', dir, fs}) + await writeFile(join(dir, file), content) + await git.add({dir, filepath: file, fs}) + return git.commit({author, dir, fs, message: 'init'}) +} + +type Scenario = { + /** + * When set, the scenario is expected to produce exactly this tuple at this filepath. + * Locks the scenario name to the actual matrix row so a quietly-shifted recipe + * (e.g. isomorphic-git encoding change) fails loudly instead of silently classifying + * a different tuple under the named scenario. + */ + expectedTuple?: {filepath: string; tuple: [number, number, number]} + name: string + setup: (dir: string) => Promise +} + +const scenarios: Scenario[] = [ + { + name: 'empty repo', + async setup(dir) { + await mkdir(dir, {recursive: true}) + await git.init({defaultBranch: 'main', dir, fs}) + }, + }, + { + name: 'baseline single committed file (clean)', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + }, + }, + { + expectedTuple: {filepath: 'new.md', tuple: [0, 2, 0]}, + name: '[0,2,0] untracked new file', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'new.md'), 'fresh') + }, + }, + { + expectedTuple: {filepath: 'new.md', tuple: [0, 2, 2]}, + name: '[0,2,2] staged new file', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'new.md'), 'fresh') + await git.add({dir, filepath: 'new.md', fs}) + }, + }, + { + expectedTuple: {filepath: 'new.md', tuple: [0, 2, 3]}, + name: '[0,2,3] partially staged new file', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'new.md'), 'fresh') + await git.add({dir, filepath: 'new.md', fs}) + await writeFile(join(dir, 'new.md'), 'modified after stage') + }, + }, + { + expectedTuple: {filepath: 'new.md', tuple: [0, 0, 3]}, + name: '[0,0,3] staged add then deleted from disk', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'new.md'), 'fresh') + await git.add({dir, filepath: 'new.md', fs}) + await unlink(join(dir, 'new.md')) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 0, 0]}, + name: '[1,0,0] staged deletion (git rm)', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await git.remove({dir, filepath: 'f.md', fs}) + await unlink(join(dir, 'f.md')) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 0, 1]}, + name: '[1,0,1] unstaged deletion (rm without git rm)', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await unlink(join(dir, 'f.md')) + }, + }, + // Note: [1,0,2] is unreachable by the encoding. w=0 means WORKDIR is absent; + // s=2 means "INDEX matches WORKDIR" by content, which is impossible when WORKDIR + // has no content. The encoding produces s=0 (INDEX absent) or s=3 (differs from both) + // in that situation. The scenario for [1,0,3] below covers the realistic recipe. + { + expectedTuple: {filepath: 'f.md', tuple: [1, 0, 3]}, + name: '[1,0,3] staged modification then deleted from disk', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + await git.add({dir, filepath: 'f.md', fs}) + await writeFile(join(dir, 'f.md'), 'v3') // make stage=3 by introducing partial-stage + await unlink(join(dir, 'f.md')) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 1, 0]}, + name: '[1,1,0] git rm --cached only', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await git.remove({dir, filepath: 'f.md', fs}) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 2, 0]}, + name: '[1,2,0] git rm --cached then edit workdir', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await git.remove({dir, filepath: 'f.md', fs}) + await writeFile(join(dir, 'f.md'), 'v2-after-rm-cached') + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 1, 3]}, + name: '[1,1,3] workdir restored to HEAD after add', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + await git.add({dir, filepath: 'f.md', fs}) + // Filesystem-only restore to HEAD content. utimes bumps mtime past the + // index's stat cache so isomorphic-git re-hashes the workdir blob; without + // that, the tuple collapses to [1,2,2] and the [1,1,3] scenario silently + // turns into a duplicate of [1,2,2]. + await writeFile(join(dir, 'f.md'), 'v1') + const future = new Date(Date.now() + 2000) + await utimes(join(dir, 'f.md'), future, future) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 2, 1]}, + name: '[1,2,1] unstaged modification', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + // Same-size payloads share stat info so the index cache reports clean unless + // mtime is bumped past the cached value (same workaround as [1,1,3]). + const future = new Date(Date.now() + 2000) + await utimes(join(dir, 'f.md'), future, future) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 2, 2]}, + name: '[1,2,2] staged modification', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + await git.add({dir, filepath: 'f.md', fs}) + }, + }, + { + expectedTuple: {filepath: 'f.md', tuple: [1, 2, 3]}, + name: '[1,2,3] partially staged modification', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + await git.add({dir, filepath: 'f.md', fs}) + await writeFile(join(dir, 'f.md'), 'v3') + // Bump mtime so the post-add v3 write is observed despite same byte size. + const future = new Date(Date.now() + 2000) + await utimes(join(dir, 'f.md'), future, future) + }, + }, + { + name: 'multi-file: staged-mod + unstaged-mod + untracked', + async setup(dir) { + await mkdir(dir, {recursive: true}) + await git.init({defaultBranch: 'main', dir, fs}) + await Promise.all(['a.md', 'b.md', 'c.md'].map((f) => writeFile(join(dir, f), 'v1'))) + await git.add({dir, filepath: 'a.md', fs}) + await git.add({dir, filepath: 'b.md', fs}) + await git.add({dir, filepath: 'c.md', fs}) + await git.commit({author, dir, fs, message: 'init'}) + await writeFile(join(dir, 'a.md'), 'staged-mod') + await git.add({dir, filepath: 'a.md', fs}) + await writeFile(join(dir, 'b.md'), 'unstaged-mod') + await writeFile(join(dir, 'untracked.md'), 'fresh') + }, + }, + { + name: 'merge conflict: both_modified', + async setup(dir) { + await initWithFile(dir, 'f.md', 'base') + await git.branch({dir, fs, ref: 'feature'}) + await git.checkout({dir, fs, ref: 'feature'}) + await writeFile(join(dir, 'f.md'), 'feature') + await git.add({dir, filepath: 'f.md', fs}) + await git.commit({author, dir, fs, message: 'feature edit'}) + await git.checkout({dir, fs, ref: 'main'}) + await writeFile(join(dir, 'f.md'), 'main') + await git.add({dir, filepath: 'f.md', fs}) + await git.commit({author, dir, fs, message: 'main edit'}) + try { + await git.merge({author, dir, fs, theirs: 'feature'}) + } catch { + // MergeConflictError expected + } + }, + }, + { + name: 'merge conflict: both_added (same path on two branches)', + async setup(dir) { + await initWithFile(dir, 'seed.md', 'seed') + await git.branch({dir, fs, ref: 'feature'}) + await git.checkout({dir, fs, ref: 'feature'}) + await writeFile(join(dir, 'shared.md'), 'feature variant') + await git.add({dir, filepath: 'shared.md', fs}) + await git.commit({author, dir, fs, message: 'feature add'}) + await git.checkout({dir, fs, ref: 'main'}) + await writeFile(join(dir, 'shared.md'), 'main variant') + await git.add({dir, filepath: 'shared.md', fs}) + await git.commit({author, dir, fs, message: 'main add'}) + try { + await git.merge({author, dir, fs, theirs: 'feature'}) + } catch { + // MergeConflictError expected + } + }, + }, + { + name: 'merge conflict: deleted_modified (one side deletes, other modifies)', + async setup(dir) { + await initWithFile(dir, 'f.md', 'base') + await git.branch({dir, fs, ref: 'feature'}) + await git.checkout({dir, fs, ref: 'feature'}) + await writeFile(join(dir, 'f.md'), 'feature edit') + await git.add({dir, filepath: 'f.md', fs}) + await git.commit({author, dir, fs, message: 'feature edit'}) + await git.checkout({dir, fs, ref: 'main'}) + await git.remove({dir, filepath: 'f.md', fs}) + await unlink(join(dir, 'f.md')) + await git.commit({author, dir, fs, message: 'main delete'}) + try { + await git.merge({author, dir, fs, theirs: 'feature'}) + } catch { + // MergeConflictError expected + } + }, + }, + { + name: 'reset --soft analog: commit then unstage via resetIndex', + async setup(dir) { + await initWithFile(dir, 'f.md', 'v1') + await writeFile(join(dir, 'f.md'), 'v2') + await git.add({dir, filepath: 'f.md', fs}) + await git.commit({author, dir, fs, message: 'v2'}) + // soft reset: index keeps v2, HEAD moved back, workdir keeps v2 — should land in [1,2,2] + const heads = await git.log({depth: 2, dir, fs}) + if (heads.length >= 2) { + await git.writeRef({dir, force: true, fs, ref: 'refs/heads/main', value: heads[1].oid}) + } + }, + }, + { + name: 'nested directory tracked file modified', + async setup(dir) { + await mkdir(join(dir, 'sub', 'deep'), {recursive: true}) + await git.init({defaultBranch: 'main', dir, fs}) + await writeFile(join(dir, 'sub', 'deep', 'f.md'), 'v1') + await git.add({dir, filepath: 'sub/deep/f.md', fs}) + await git.commit({author, dir, fs, message: 'init'}) + await writeFile(join(dir, 'sub', 'deep', 'f.md'), 'v2') + }, + }, +] + +describe('statusMatrix tuple reachability fuzz (Gap D)', () => { + const observedTuples = new Set() + const unclassifiableTuples: Array<{key: string; scenario: string}> = [] + + for (const scenario of scenarios) { + it(`scenario "${scenario.name}" → every observed tuple is classifiable`, async () => { + const dir = makeDir() + try { + await scenario.setup(dir) + const matrix = await git.statusMatrix({dir, fs}) + if (scenario.expectedTuple) { + const {filepath, tuple} = scenario.expectedTuple + const row = matrix.find((r) => r[0] === filepath) + expect(row, `scenario "${scenario.name}" expected ${filepath} present in matrix`).to.not.be.undefined + expect(row).to.deep.equal([filepath, ...tuple]) + } + + for (const [filepath, head, workdir, stage] of matrix) { + const key = `[${head},${workdir},${stage}]` + observedTuples.add(key) + try { + classifyTuple(head, workdir, stage) + } catch (error) { + unclassifiableTuples.push({key, scenario: `${scenario.name} (${String(filepath)})`}) + throw error + } + } + } finally { + await rm(dir, {force: true, recursive: true}).catch(() => {}) + } + }) + } + + it('reachable set is non-empty and fully classifiable', () => { + // The per-scenario tests above are the real regression guard: a new tuple + // surfaces ⇒ classifyTuple throws ⇒ that test fails. This summary just + // pins a sanity floor so an empty/silent harness can't pretend to pass. + expect(unclassifiableTuples).to.deep.equal([]) + expect(observedTuples.size).to.be.greaterThan(0) + }) +}) diff --git a/test/unit/agent/tools/curate-tool.test.ts b/test/unit/agent/tools/curate-tool.test.ts index 2239a6146..f1f7d44f4 100644 --- a/test/unit/agent/tools/curate-tool.test.ts +++ b/test/unit/agent/tools/curate-tool.test.ts @@ -3,7 +3,8 @@ import * as fs from 'node:fs/promises' import {tmpdir} from 'node:os' import {join} from 'node:path' -import {createCurateTool} from '../../../../src/agent/infra/tools/implementations/curate-tool.js' +import {runWithReviewDisabled} from '../../../../src/agent/infra/tools/implementations/curate-tool-task-context.js' +import {createCurateTool, executeCurate} from '../../../../src/agent/infra/tools/implementations/curate-tool.js' interface CurateOutput { applied: Array<{ @@ -49,6 +50,33 @@ function countByPrefix(items: string[], prefix: string): number { return count } +async function writeBrvConfig(tmpDir: string, reviewDisabled: boolean): Promise { + const configPath = join(tmpDir, '.brv', 'config.json') + await fs.writeFile( + configPath, + JSON.stringify({createdAt: '2026-01-01T00:00:00.000Z', cwd: tmpDir, reviewDisabled, version: '0.0.1'}), + 'utf8', + ) +} + +async function seedExistingFile(basePath: string): Promise { + const tool = createCurateTool() + await tool.execute({ + basePath, + operations: [ + { + confidence: 'high', + content: {keywords: [], snippets: ['initial content'], tags: []}, + impact: 'low', + path: 'security/auth', + reason: 'seed', + title: 'JWT Strategy', + type: 'ADD', + }, + ], + }) +} + describe('Curate Tool', () => { let tmpDir: string let basePath: string @@ -1406,4 +1434,182 @@ describe('Curate Tool', () => { expect(result.applied[0].impact).to.equal('low') }) }) + + describe('Review backup gating (`brv review --disable`)', () => { + it('does NOT create review-backups when reviewDisabled=true (UPDATE on existing file)', async () => { + await seedExistingFile(basePath) + await writeBrvConfig(tmpDir, true) + + const tool = createCurateTool() + const result = (await tool.execute({ + basePath, + operations: [ + { + confidence: 'high', + content: {keywords: [], snippets: ['updated content'], tags: []}, + impact: 'high', + path: 'security/auth', + reason: 'CRITICAL update', + title: 'JWT Strategy', + type: 'UPDATE', + }, + ], + })) as CurateOutput + + expect(result.applied[0].status).to.equal('success') + expect(result.applied[0].needsReview).to.be.true + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(false) + }) + + it('DOES create review-backups when reviewDisabled=false (UPDATE on existing file)', async () => { + await seedExistingFile(basePath) + await writeBrvConfig(tmpDir, false) + + const tool = createCurateTool() + await tool.execute({ + basePath, + operations: [ + { + confidence: 'high', + content: {keywords: [], snippets: ['updated content'], tags: []}, + impact: 'high', + path: 'security/auth', + reason: 'CRITICAL update', + title: 'JWT Strategy', + type: 'UPDATE', + }, + ], + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(true) + }) + + it('skips backups across MULTIPLE ops in one tool call when disabled (snapshot consistency)', async () => { + await seedExistingFile(basePath) + await writeBrvConfig(tmpDir, true) + + const tool = createCurateTool() + // Three updates in a single tool invocation — all should observe the snapshot + const result = (await tool.execute({ + basePath, + operations: [ + {confidence: 'high', content: {keywords: [], snippets: ['v2'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + {confidence: 'high', content: {keywords: [], snippets: ['v3'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + {confidence: 'high', content: {keywords: [], snippets: ['v4'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + ], + })) as CurateOutput + + for (const op of result.applied) { + expect(op.status).to.equal('success') + } + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(false) + }) + + it('treats missing config as enabled (fail-open) — backups still created', async () => { + await seedExistingFile(basePath) + // Note: no writeBrvConfig — .brv/config.json does not exist + + const tool = createCurateTool() + await tool.execute({ + basePath, + operations: [ + { + confidence: 'high', + content: {keywords: [], snippets: ['updated'], tags: []}, + impact: 'high', + path: 'security/auth', + reason: 'r', + title: 'JWT Strategy', + type: 'UPDATE', + }, + ], + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(true) + }) + + it('ALS scope takes precedence over config file — scope=true suppresses backups even when config says false', async () => { + await seedExistingFile(basePath) + // Config says review is ENABLED + await writeBrvConfig(tmpDir, false) + + // Scope (daemon-stamped snapshot) says DISABLED + await runWithReviewDisabled(true, async () => { + const tool = createCurateTool() + await tool.execute({ + basePath, + operations: [ + {confidence: 'high', content: {keywords: [], snippets: ['scope-test'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + ], + }) + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(false) + }) + + it('ALS scope takes precedence over config file — scope=false enables backups even when config says true', async () => { + await seedExistingFile(basePath) + // Config says review is DISABLED + await writeBrvConfig(tmpDir, true) + + // Scope says ENABLED + await runWithReviewDisabled(false, async () => { + const tool = createCurateTool() + await tool.execute({ + basePath, + operations: [ + {confidence: 'high', content: {keywords: [], snippets: ['scope-test-2'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + ], + }) + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(true) + }) + + it('ALS scope honored via executeCurate sandbox path (no _context.taskId) — proves CurateService route picks up the snapshot', async () => { + // This is the regression that the Map-based registry missed: CurateService.curate() + // calls executeCurate(input, undefined, ...). Without ALS the file read wins on toggle. + await seedExistingFile(basePath) + // File config says DISABLED — what the CLI would see after a mid-task `brv review --disable` + await writeBrvConfig(tmpDir, true) + + // Scope captured the snapshot at task-create (review was ENABLED then) + await runWithReviewDisabled(false, async () => { + // Mimic CurateService.curate(): executeCurate with _context = undefined + await executeCurate({ + basePath, + operations: [ + {confidence: 'high', content: {keywords: [], snippets: ['als-via-sandbox'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + ], + }) + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(true) + }) + + it('outside any ALS scope falls back to config file', async () => { + await seedExistingFile(basePath) + await writeBrvConfig(tmpDir, true) + + // No runWithReviewDisabled wrapper → ALS returns undefined → fallback reads config = disabled + await executeCurate({ + basePath, + operations: [ + {confidence: 'high', content: {keywords: [], snippets: ['no-scope'], tags: []}, impact: 'high', path: 'security/auth', reason: 'r', title: 'JWT Strategy', type: 'UPDATE'}, + ], + }) + + const backupsDir = join(tmpDir, '.brv', 'review-backups') + expect(await pathExists(backupsDir)).to.equal(false) + }) + }) }) diff --git a/test/unit/core/domain/entities/brv-config.test.ts b/test/unit/core/domain/entities/brv-config.test.ts index 53d1db313..404603e62 100644 --- a/test/unit/core/domain/entities/brv-config.test.ts +++ b/test/unit/core/domain/entities/brv-config.test.ts @@ -230,6 +230,55 @@ describe('BrvConfig', () => { }) }) + describe('reviewDisabled', () => { + it('should default to undefined when not set', () => { + const config = new BrvConfig(validConstructorArgs) + expect(config.reviewDisabled).to.be.undefined + }) + + it('should preserve true value through constructor', () => { + const config = new BrvConfig({...validConstructorArgs, reviewDisabled: true}) + expect(config.reviewDisabled).to.be.true + }) + + it('should round-trip true through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, reviewDisabled: true}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.reviewDisabled).to.be.true + }) + + it('should round-trip false through toJson/fromJson', () => { + const config = new BrvConfig({...validConstructorArgs, reviewDisabled: false}) + const restored = BrvConfig.fromJson(config.toJson()) + expect(restored.reviewDisabled).to.be.false + }) + + it('should reject non-boolean reviewDisabled in fromJson', () => { + expect(() => + BrvConfig.fromJson({...validConstructorArgs, reviewDisabled: 'yes'}), + ).to.throw('Invalid BrvConfig JSON structure') + }) + + it('withReviewDisabled creates a new config with the flag set, preserving other fields', () => { + const original = new BrvConfig(validConstructorArgs) + const disabled = original.withReviewDisabled(true) + + expect(disabled.reviewDisabled).to.be.true + expect(disabled.spaceId).to.equal(original.spaceId) + expect(disabled.teamId).to.equal(original.teamId) + expect(disabled.cwd).to.equal(original.cwd) + expect(disabled.createdAt).to.equal(original.createdAt) + // Original is not mutated + expect(original.reviewDisabled).to.be.undefined + }) + + it('withReviewDisabled(false) creates an enabled config', () => { + const original = new BrvConfig({...validConstructorArgs, reviewDisabled: true}) + const enabled = original.withReviewDisabled(false) + expect(enabled.reviewDisabled).to.be.false + }) + }) + describe('fromSpace', () => { it('should create config from Space entity with current version', () => { const space = new Space({ diff --git a/test/unit/core/domain/entities/provider-registry.test.ts b/test/unit/core/domain/entities/provider-registry.test.ts index 9749c9a25..e3da74413 100644 --- a/test/unit/core/domain/entities/provider-registry.test.ts +++ b/test/unit/core/domain/entities/provider-registry.test.ts @@ -97,6 +97,11 @@ describe('Provider Registry', () => { expect(provider?.oauth).to.be.undefined }) + it('should not have a defaultModel for openai-compatible (no sensible placeholder for self-hosted endpoints)', () => { + const provider = getProviderById('openai-compatible') + expect(provider?.defaultModel).to.be.undefined + }) + it('should not have oauth config for byterover', () => { const provider = getProviderById('byterover') expect(provider?.oauth).to.be.undefined diff --git a/test/unit/infra/daemon/post-work-registry.test.ts b/test/unit/infra/daemon/post-work-registry.test.ts new file mode 100644 index 000000000..c7a6a8a51 --- /dev/null +++ b/test/unit/infra/daemon/post-work-registry.test.ts @@ -0,0 +1,282 @@ +import {expect} from 'chai' + +import {PostWorkRegistry} from '../../../../src/server/infra/daemon/post-work-registry.js' + +const noop = (): void => { + // intentionally empty +} + +/** Manually-resolvable promise — lets a test step through ordering invariants without sleeps. */ +function deferred(): {promise: Promise; reject: (err: Error) => void; resolve: (value: T) => void;} { + let resolve: (value: T) => void = noop + let reject: (err: Error) => void = noop + const promise = new Promise((res, rej) => { + resolve = res + reject = rej + }) + return {promise, reject, resolve} +} + +/** A never-resolving promise — feeds drain() with a thunk that exceeds the timeout. */ +function neverResolves(): Promise { + return new Promise((_resolve) => { + // never call resolve + }) +} + +describe('PostWorkRegistry', () => { + describe('submit', () => { + it('runs the submitted thunk asynchronously', async () => { + const registry = new PostWorkRegistry() + const ran: string[] = [] + const done = deferred() + + registry.submit('/projA', async () => { + ran.push('thunk') + done.resolve() + }) + + // submit returns synchronously; the thunk has not necessarily started yet + ran.push('after-submit') + + await done.promise + expect(ran).to.deep.equal(['after-submit', 'thunk']) + }) + + it('serialises work for the same project (per-project mutex)', async () => { + const registry = new PostWorkRegistry() + const order: string[] = [] + const a = deferred() + const b = deferred() + const aStarted = deferred() + const bStarted = deferred() + + registry.submit('/proj', async () => { + order.push('a-start') + aStarted.resolve() + await a.promise + order.push('a-end') + }) + registry.submit('/proj', async () => { + order.push('b-start') + bStarted.resolve() + await b.promise + order.push('b-end') + }) + + // A starts immediately; B is blocked by mutex until A completes. + await aStarted.promise + // Let several macro/microtasks run — B must NOT start while A is awaiting `a`. + await new Promise((resolve) => { + setTimeout(resolve, 20) + }) + expect(order).to.deep.equal(['a-start']) + + // Release A. B should start after A finishes. + a.resolve() + await bStarted.promise + expect(order).to.deep.equal(['a-start', 'a-end', 'b-start']) + + // Release B and wait for the project tail to drain. + b.resolve() + await registry.awaitProject('/proj') + expect(order).to.deep.equal(['a-start', 'a-end', 'b-start', 'b-end']) + }) + + it('runs work for different projects concurrently', async () => { + const registry = new PostWorkRegistry() + const order: string[] = [] + const a = deferred() + const b = deferred() + + registry.submit('/projA', async () => { + order.push('A-start') + await a.promise + order.push('A-end') + }) + registry.submit('/projB', async () => { + order.push('B-start') + await b.promise + order.push('B-end') + }) + + await Promise.resolve() + await Promise.resolve() + // Both should have started — different projects, no shared mutex. + expect(order).to.have.members(['A-start', 'B-start']) + + a.resolve() + b.resolve() + await registry.awaitProject('/projA') + await registry.awaitProject('/projB') + expect(order).to.have.members(['A-start', 'B-start', 'A-end', 'B-end']) + }) + + it('isolates errors — a thrown thunk does not block subsequent submissions', async () => { + const registry = new PostWorkRegistry() + const order: string[] = [] + + registry.submit('/proj', async () => { + order.push('a') + throw new Error('boom') + }) + registry.submit('/proj', async () => { + order.push('b') + }) + + await registry.awaitProject('/proj') + expect(order).to.deep.equal(['a', 'b']) + }) + }) + + describe('awaitProject', () => { + it('resolves immediately when there is no work for the project', async () => { + const registry = new PostWorkRegistry() + const start = Date.now() + await registry.awaitProject('/never-submitted') + expect(Date.now() - start).to.be.lessThan(50) + }) + + it('resolves only after all queued work for the project completes', async () => { + const registry = new PostWorkRegistry() + const work = deferred() + let done = false + + registry.submit('/proj', async () => { + await work.promise + }) + + const awaitPromise = registry.awaitProject('/proj').then(() => { + done = true + }) + + await Promise.resolve() + expect(done).to.equal(false) + + work.resolve() + await awaitPromise + expect(done).to.equal(true) + }) + + it('does not wait for work submitted to other projects', async () => { + const registry = new PostWorkRegistry() + const otherWork = deferred() + + registry.submit('/other', async () => { + await otherWork.promise + }) + + const start = Date.now() + await registry.awaitProject('/proj') + expect(Date.now() - start).to.be.lessThan(50) + otherWork.resolve() + await registry.awaitProject('/other') + }) + }) + + describe('awaitAll', () => { + it('resolves immediately when there is no work in any project', async () => { + const registry = new PostWorkRegistry() + const start = Date.now() + await registry.awaitAll() + expect(Date.now() - start).to.be.lessThan(50) + }) + + it('resolves only after every queued tail completes (multi-project)', async () => { + const registry = new PostWorkRegistry() + const a = deferred() + const b = deferred() + let done = false + + registry.submit('/projA', async () => { + await a.promise + }) + registry.submit('/projB', async () => { + await b.promise + }) + + const awaiting = registry.awaitAll().then(() => { + done = true + }) + + // Neither project finished yet — awaitAll must block. + await new Promise((resolve) => { + setTimeout(resolve, 20) + }) + expect(done).to.equal(false) + + a.resolve() + // Still waiting on B. + await new Promise((resolve) => { + setTimeout(resolve, 20) + }) + expect(done).to.equal(false) + + b.resolve() + await awaiting + expect(done).to.equal(true) + }) + + it('does not block on submissions arriving after the call', async () => { + // Tail captured at call time (matches awaitProject semantics) — the + // hot-swap caller needs a deterministic completion, not an open wait. + const registry = new PostWorkRegistry() + const a = deferred() + registry.submit('/proj', async () => { + await a.promise + }) + + const awaiting = registry.awaitAll() + const b = deferred() + registry.submit('/proj', async () => { + await b.promise + }) + + a.resolve() + await awaiting + b.resolve() + await registry.awaitProject('/proj') + }) + }) + + describe('drain', () => { + it('returns when all in-flight work across all projects completes', async () => { + const registry = new PostWorkRegistry() + const a = deferred() + const b = deferred() + + registry.submit('/projA', async () => { await a.promise }) + registry.submit('/projB', async () => { await b.promise }) + + const drainPromise = registry.drain(5000) + await Promise.resolve() + + a.resolve() + b.resolve() + + const result = await drainPromise + expect(result.drained).to.equal(2) + expect(result.abandoned).to.equal(0) + }) + + it('abandons work that does not finish within the timeout', async () => { + const registry = new PostWorkRegistry() + registry.submit('/proj', neverResolves) + + const result = await registry.drain(50) + expect(result.drained).to.equal(0) + expect(result.abandoned).to.equal(1) + }) + + it('counts errored thunks as drained, not abandoned', async () => { + const registry = new PostWorkRegistry() + registry.submit('/proj', async () => { + throw new Error('failure inside drain') + }) + + const result = await registry.drain(1000) + expect(result.drained).to.equal(1) + expect(result.abandoned).to.equal(0) + }) + }) +}) diff --git a/test/unit/infra/executor/curate-executor.test.ts b/test/unit/infra/executor/curate-executor.test.ts index 126cbbda9..95d718075 100644 --- a/test/unit/infra/executor/curate-executor.test.ts +++ b/test/unit/infra/executor/curate-executor.test.ts @@ -14,6 +14,33 @@ import {restore, stub} from 'sinon' import type {ICipherAgent} from '../../../../src/agent/core/interfaces/i-cipher-agent.js' import {LocalSandbox} from '../../../../src/agent/infra/sandbox/local-sandbox.js' + +/** + * Mock cipher agent used by the runAgentBody / finalize split tests. + * Hoisted to module scope (consistent-function-scoping lint rule). + */ +function buildSplitTestAgent(): ICipherAgent { + return { + cancel: stub().resolves(false), + createTaskSession: stub().resolves('session-id'), + deleteSandboxVariable: stub(), + deleteSandboxVariableOnSession: stub(), + deleteSession: stub().resolves(true), + deleteTaskSession: stub().resolves(), + execute: stub().resolves(''), + executeOnSession: stub().resolves('curated'), + generate: stub().resolves({content: '', toolCalls: [], usage: {inputTokens: 0, outputTokens: 0}}), + getSessionMetadata: stub().resolves(), + getState: stub().returns({currentIteration: 0, executionHistory: [], executionState: 'idle', toolCallsExecuted: 0}), + listPersistedSessions: stub().resolves([]), + reset: stub(), + setSandboxVariable: stub(), + setSandboxVariableOnSession: stub(), + start: stub().resolves(), + stream: stub().resolves({[Symbol.asyncIterator]: () => ({next: () => Promise.resolve({done: true, value: undefined})})}), + } as unknown as ICipherAgent +} + import {FileValidationError} from '../../../../src/server/core/domain/errors/task-error.js' import {FileContextTreeManifestService} from '../../../../src/server/infra/context-tree/file-context-tree-manifest-service.js' import {FileContextTreeSnapshotService} from '../../../../src/server/infra/context-tree/file-context-tree-snapshot-service.js' @@ -375,4 +402,91 @@ describe('CurateExecutor (regression)', () => { expect(promptArg).to.include('Do NOT call tools.curation.recon') }) }) + + describe('runAgentBody / finalize split', () => { + // runAgentBody must return the response BEFORE Phase 4 runs so the daemon + // can fire `task:completed` early and queue finalize for background work. + // Under cascade-defer (ENG-2485), Phase 4 is enqueueStaleSummaryPaths + + // buildManifest — it must NEVER call propagateStaleness inline. + + it('returns the agent response without running Phase 4 first', async () => { + const agent = buildSplitTestAgent() + stub(FileContextTreeSnapshotService.prototype, 'getCurrentState') + .onFirstCall() + .resolves(new Map()) + .onSecondCall() + .resolves(new Map([['auth/jwt.md', {hash: 'h', size: 1}]])) + const propagateStalenessStub = stub( + FileContextTreeSummaryService.prototype, + 'propagateStaleness', + ).resolves([]) + const buildManifestStub = stub(FileContextTreeManifestService.prototype, 'buildManifest').resolves() + const enqueueStub = stub(DreamStateService.prototype, 'enqueueStaleSummaryPaths').resolves() + stub(DreamStateService.prototype, 'incrementCurationCount').resolves() + + const executor = new CurateExecutor() + const {finalize, response} = await executor.runAgentBody(agent, { + clientCwd: '/p', + content: 'capture', + projectRoot: '/p', + taskId: 't1', + }) + + // Phase 4 must NOT have run yet — response was returned immediately. + expect(response).to.equal('curated') + expect(enqueueStub.called).to.be.false + expect(buildManifestStub.called).to.be.false + expect((agent.deleteTaskSession as ReturnType).called).to.be.false + + // finalize() runs Phase 4: enqueue + manifest rebuild + session cleanup. + // ENG-2485 invariant: propagateStaleness MUST NOT run on the curate path. + await finalize() + expect(enqueueStub.calledOnce).to.be.true + expect(buildManifestStub.calledOnce).to.be.true + expect(propagateStalenessStub.called).to.be.false + expect((agent.deleteTaskSession as ReturnType).calledOnce).to.be.true + }) + + it('cleans up the task session if the agent body throws (no finalize returned)', async () => { + const agent = buildSplitTestAgent() + ;(agent.executeOnSession as ReturnType).rejects(new Error('agent failed')) + + const executor = new CurateExecutor() + try { + await executor.runAgentBody(agent, {clientCwd: '/p', content: 'x', taskId: 't2'}) + expect.fail('should have thrown') + } catch (error) { + expect((error as Error).message).to.equal('agent failed') + } + + // Even on agent body failure, the session must be cleaned up — no leak. + expect((agent.deleteTaskSession as ReturnType).calledOnceWithExactly('session-id')).to.be.true + }) + + it('executeWithAgent (backwards-compat wrapper) still runs Phase 4 inline before returning', async () => { + const agent = buildSplitTestAgent() + stub(FileContextTreeSnapshotService.prototype, 'getCurrentState') + .onFirstCall() + .resolves(new Map()) + .onSecondCall() + .resolves(new Map([['auth/jwt.md', {hash: 'h', size: 1}]])) + stub(FileContextTreeSummaryService.prototype, 'propagateStaleness').resolves([]) + stub(FileContextTreeManifestService.prototype, 'buildManifest').resolves() + const enqueueStub = stub(DreamStateService.prototype, 'enqueueStaleSummaryPaths').resolves() + stub(DreamStateService.prototype, 'incrementCurationCount').resolves() + + const executor = new CurateExecutor() + const result = await executor.executeWithAgent(agent, { + clientCwd: '/p', + content: 'x', + projectRoot: '/p', + taskId: 't3', + }) + + expect(result).to.equal('curated') + // Wrapper awaits finalize internally — cascade-defer enqueue ran by the + // time we get here. + expect(enqueueStub.calledOnce).to.be.true + }) + }) }) diff --git a/test/unit/infra/executor/dream-executor.test.ts b/test/unit/infra/executor/dream-executor.test.ts index 170c647b7..b62576b26 100644 --- a/test/unit/infra/executor/dream-executor.test.ts +++ b/test/unit/infra/executor/dream-executor.test.ts @@ -31,6 +31,7 @@ function makePartialRunExecutor(args: { logId: string out: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] projectRoot: string + reviewDisabled?: boolean signal: AbortSignal taskId: string }): Promise { @@ -151,6 +152,24 @@ describe('DreamExecutor', () => { expect(result).to.not.include('No changes needed') }) + it('omits the flagged-for-review line when review is disabled', () => { + const executor = new DreamExecutor(deps) + const formatResult = (executor as unknown as {formatResult(logId: string, summary: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamLogSummary, reviewDisabled: boolean): string}).formatResult.bind(executor) + + const result = formatResult('drm-3600', {consolidated: 1, errors: 0, flaggedForReview: 2, pruned: 0, synthesized: 1}, true) + expect(result).to.include('1 consolidated') + expect(result).to.include('1 synthesized') + expect(result).to.not.include('flagged for review') + }) + + it('still shows the flagged-for-review line when review is enabled', () => { + const executor = new DreamExecutor(deps) + const formatResult = (executor as unknown as {formatResult(logId: string, summary: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamLogSummary, reviewDisabled: boolean): string}).formatResult.bind(executor) + + const result = formatResult('drm-3700', {consolidated: 0, errors: 0, flaggedForReview: 3, pruned: 0, synthesized: 0}, false) + expect(result).to.include('3 operations flagged for review') + }) + it('formats result with error count and omits no-changes message', () => { const executor = new DreamExecutor(deps) const formatResult = (executor as unknown as {formatResult(logId: string, summary: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamLogSummary): string}).formatResult.bind(executor) @@ -377,8 +396,8 @@ describe('DreamExecutor', () => { ] // Call private method directly to test dual-write logic - await (executor as unknown as {createReviewEntries: (ops: typeof operations, dir: string, taskId: string) => Promise}) - .createReviewEntries(operations, '/tmp/ctx', 'test-task') + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: false, taskId: 'test-task'}) expect(curateLogStore.getNextId.calledOnce).to.be.true expect(curateLogStore.save.calledOnce).to.be.true @@ -409,8 +428,8 @@ describe('DreamExecutor', () => { }, ] - await (executor as unknown as {createReviewEntries: (ops: typeof operations, dir: string, taskId: string) => Promise}) - .createReviewEntries(operations, '/tmp/ctx', 'test-task') + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: false, taskId: 'test-task'}) const savedEntry = curateLogStore.save.firstCall.args[0] expect(savedEntry.taskId).to.equal('test-task') @@ -438,8 +457,8 @@ describe('DreamExecutor', () => { }, ] - await (executor as unknown as {createReviewEntries: (ops: typeof operations, dir: string, taskId: string) => Promise}) - .createReviewEntries(operations, '/tmp/ctx', 'test-task') + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: false, taskId: 'test-task'}) const savedEntry = curateLogStore.save.firstCall.args[0] expect(savedEntry.operations[0]).to.include({ @@ -725,6 +744,7 @@ describe('DreamExecutor', () => { logId: string out: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] projectRoot: string + reviewDisabled?: boolean signal: AbortSignal taskId: string }) => Promise @@ -770,4 +790,141 @@ describe('DreamExecutor', () => { }) }) }) + + // ── reviewDisabled — `brv review --disable` ──────────────────────────────── + describe('reviewDisabled', () => { + it('skips dream-side review entry creation when options.reviewDisabled=true', async () => { + const executor = new DreamExecutor(deps) + const operations: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] = [ + {action: 'ARCHIVE', file: 'auth/stale.md', needsReview: true, reason: 'Stale doc', stubPath: '_archived/auth/stale.stub.md', type: 'PRUNE'}, + ] + + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: true, taskId: 'test-task'}) + + expect(curateLogStore.save.called).to.be.false + }) + + it('still creates dream-side review entries when options.reviewDisabled=false', async () => { + const executor = new DreamExecutor(deps) + const operations: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] = [ + {action: 'ARCHIVE', file: 'auth/stale.md', needsReview: true, reason: 'Stale doc', stubPath: '_archived/auth/stale.stub.md', type: 'PRUNE'}, + ] + + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: false, taskId: 'test-task'}) + + expect(curateLogStore.save.calledOnce).to.be.true + }) + + it('treats omitted options.reviewDisabled as enabled (fail-open)', async () => { + const executor = new DreamExecutor(deps) + const operations: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] = [ + {action: 'ARCHIVE', file: 'auth/stale.md', needsReview: true, reason: 'Stale doc', stubPath: '_archived/auth/stale.stub.md', type: 'PRUNE'}, + ] + + // executeWithAgent treats undefined as false; createReviewEntries gets called with the boolean + await (executor as unknown as {createReviewEntries: (args: {contextTreeDir: string; operations: typeof operations; reviewDisabled: boolean; taskId: string}) => Promise}) + .createReviewEntries({contextTreeDir: '/tmp/ctx', operations, reviewDisabled: false, taskId: 'test-task'}) + + expect(curateLogStore.save.calledOnce).to.be.true + }) + + it('runOperations omits reviewBackupStore from consolidate/prune when reviewDisabled=true', async () => { + const reviewBackupStore = {save: stub().resolves()} + class ProbeExecutor extends DreamExecutor { + public capturedReviewBackupStore: unknown + public capturedReviewDisabled?: boolean + + protected override async runOperations(args: { + agent: ICipherAgent + changedFiles: Set + contextTreeDir: string + logId: string + out: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] + projectRoot: string + reviewDisabled?: boolean + signal: AbortSignal + taskId: string + }): Promise { + this.capturedReviewDisabled = args.reviewDisabled + this.capturedReviewBackupStore = + args.reviewDisabled === true ? undefined : (this as unknown as {deps: {reviewBackupStore?: unknown}}).deps.reviewBackupStore + } + } + + const executor = new ProbeExecutor({...deps, reviewBackupStore}) + await executor.executeWithAgent(agent, {...defaultOptions, reviewDisabled: true}) + + expect(executor.capturedReviewDisabled).to.equal(true) + expect(executor.capturedReviewBackupStore).to.be.undefined + }) + + it('runOperations passes reviewBackupStore through when reviewDisabled=false', async () => { + const reviewBackupStore = {save: stub().resolves()} + class ProbeExecutor extends DreamExecutor { + public capturedReviewBackupStore: unknown + + protected override async runOperations(args: { + agent: ICipherAgent + changedFiles: Set + contextTreeDir: string + logId: string + out: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] + projectRoot: string + reviewDisabled?: boolean + signal: AbortSignal + taskId: string + }): Promise { + this.capturedReviewBackupStore = + args.reviewDisabled === true ? undefined : (this as unknown as {deps: {reviewBackupStore?: unknown}}).deps.reviewBackupStore + } + } + + const executor = new ProbeExecutor({...deps, reviewBackupStore}) + await executor.executeWithAgent(agent, {...defaultOptions, reviewDisabled: false}) + + expect(executor.capturedReviewBackupStore).to.equal(reviewBackupStore) + }) + + it('snapshots options.reviewDisabled — runOperations and createReviewEntries see the same value', async () => { + const reviewBackupStore = {save: stub().resolves()} + let capturedRunOpsReviewDisabled: boolean | undefined + let capturedCreateReviewEntriesReviewDisabled: boolean | undefined + + class ProbeExecutor extends DreamExecutor { + protected override async runOperations(args: { + agent: ICipherAgent + changedFiles: Set + contextTreeDir: string + logId: string + out: import('../../../../src/server/infra/dream/dream-log-schema.js').DreamOperation[] + projectRoot: string + reviewDisabled?: boolean + signal: AbortSignal + taskId: string + }): Promise { + capturedRunOpsReviewDisabled = args.reviewDisabled + // Simulate one needsReview op so the private createReviewEntries is invoked + args.out.push({action: 'ARCHIVE', file: 'auth/stale.md', needsReview: true, reason: 'Stale doc', stubPath: '_archived/auth/stale.stub.md', type: 'PRUNE'}) + } + } + + const executor = new ProbeExecutor({...deps, reviewBackupStore}) + + // Patch the private createReviewEntries via prototype to capture its reviewDisabled arg + type CreateReviewEntriesArgs = {contextTreeDir: string; operations: unknown[]; reviewDisabled: boolean; taskId: string} + const proto = Object.getPrototypeOf(Object.getPrototypeOf(executor)) as {createReviewEntries: (args: CreateReviewEntriesArgs) => Promise} + const origCreateReviewEntries = proto.createReviewEntries.bind(executor) + ;(executor as unknown as {createReviewEntries: (args: CreateReviewEntriesArgs) => Promise}).createReviewEntries = async (args) => { + capturedCreateReviewEntriesReviewDisabled = args.reviewDisabled + return origCreateReviewEntries(args) + } + + await executor.executeWithAgent(agent, {...defaultOptions, reviewDisabled: true}) + + expect(capturedRunOpsReviewDisabled).to.equal(true) + expect(capturedCreateReviewEntriesReviewDisabled).to.equal(true) + }) + }) }) diff --git a/test/unit/infra/executor/folder-pack-executor.test.ts b/test/unit/infra/executor/folder-pack-executor.test.ts index cb0f233ff..58a334ea6 100644 --- a/test/unit/infra/executor/folder-pack-executor.test.ts +++ b/test/unit/infra/executor/folder-pack-executor.test.ts @@ -21,8 +21,20 @@ import {LocalSandbox} from '../../../../src/agent/infra/sandbox/local-sandbox.js import {FileContextTreeManifestService} from '../../../../src/server/infra/context-tree/file-context-tree-manifest-service.js' import {FileContextTreeSnapshotService} from '../../../../src/server/infra/context-tree/file-context-tree-snapshot-service.js' import {FileContextTreeSummaryService} from '../../../../src/server/infra/context-tree/file-context-tree-summary-service.js' +import {DreamLockService} from '../../../../src/server/infra/dream/dream-lock-service.js' import {FolderPackExecutor} from '../../../../src/server/infra/executor/folder-pack-executor.js' +/** + * Default DreamLockService stubs so Phase 4 tests don't write real + * `dream.lock` files. Folder-pack's post-work holds the lock around + * propagateStaleness + buildManifest. + */ +function stubDreamLockServiceDefaults(): void { + stub(DreamLockService.prototype, 'tryAcquire').resolves({acquired: true, priorMtime: 0}) + stub(DreamLockService.prototype, 'release').resolves() + stub(DreamLockService.prototype, 'rollback').resolves() +} + function createMockAgent(): ICipherAgent { return { cancel: stub().resolves(false), @@ -97,8 +109,13 @@ describe('FolderPackExecutor', () => { expect(instructionsVar).to.equal(llmGeneratedVarName) }) }) + }) describe('summary propagation', () => { + beforeEach(() => { + stubDreamLockServiceDefaults() + }) + afterEach(() => { restore() }) @@ -308,7 +325,6 @@ describe('FolderPackExecutor', () => { expect(drainBackgroundWork.calledOnce).to.be.true }) }) -}) describe('workspace path resolution (PR3)', () => { let testDir: string @@ -406,4 +422,90 @@ describe('FolderPackExecutor', () => { }) }) }) + +describe('runAgentBody / finalize split', () => { + // Mirrors the curate-executor split: response must return before Phase 4 + // so the daemon can fire `task:completed` and queue finalize. + + beforeEach(() => { + stubDreamLockServiceDefaults() + }) + + afterEach(() => { + restore() + }) + + it('returns the response without running Phase 4 first', async () => { + const folderPackService = { + generateXml: stub().returns(''), + initialize: stub().resolves(), + pack: stub().resolves({fileCount: 0, files: [], totalLines: 0}), + } as unknown as IFolderPackService + + const executor = new FolderPackExecutor(folderPackService) + stub( + executor as unknown as {executeIterative: (...args: unknown[]) => Promise}, + 'executeIterative', + ).resolves('curated') + stub(FileContextTreeSnapshotService.prototype, 'getCurrentState') + .onFirstCall() + .resolves(new Map()) + .onSecondCall() + .resolves(new Map([['auth/jwt.md', {hash: 'h', size: 1}]])) + const propagateStaleness = stub(FileContextTreeSummaryService.prototype, 'propagateStaleness').resolves([]) + stub(FileContextTreeManifestService.prototype, 'buildManifest').resolves() + + const drainBackgroundWork = stub().resolves() + const agent = {drainBackgroundWork} as unknown as ICipherAgent + const {finalize, response} = await executor.runAgentBody(agent, { + clientCwd: '/workspace', + content: 'curate', + folderPath: 'src', + taskId: 'fp-split-1', + }) + + // Phase 4 must NOT have run yet — response was returned immediately. + expect(response).to.equal('curated') + expect(propagateStaleness.called).to.be.false + expect(drainBackgroundWork.called).to.be.false + + await finalize() + expect(propagateStaleness.calledOnce).to.be.true + expect(drainBackgroundWork.calledOnce).to.be.true + }) + + it('executeWithAgent (backwards-compat wrapper) still runs Phase 4 inline before returning', async () => { + const folderPackService = { + generateXml: stub().returns(''), + initialize: stub().resolves(), + pack: stub().resolves({fileCount: 0, files: [], totalLines: 0}), + } as unknown as IFolderPackService + + const executor = new FolderPackExecutor(folderPackService) + stub( + executor as unknown as {executeIterative: (...args: unknown[]) => Promise}, + 'executeIterative', + ).resolves('curated') + stub(FileContextTreeSnapshotService.prototype, 'getCurrentState') + .onFirstCall() + .resolves(new Map()) + .onSecondCall() + .resolves(new Map([['auth/jwt.md', {hash: 'h', size: 1}]])) + const propagateStaleness = stub(FileContextTreeSummaryService.prototype, 'propagateStaleness').resolves([]) + + const drainBackgroundWork = stub().resolves() + const agent = {drainBackgroundWork} as unknown as ICipherAgent + const result = await executor.executeWithAgent(agent, { + clientCwd: '/workspace', + content: 'curate', + folderPath: 'src', + taskId: 'fp-split-2', + }) + + expect(result).to.equal('curated') + // Wrapper awaits finalize internally — Phase 4 ran by the time we get here. + expect(propagateStaleness.calledOnce).to.be.true + expect(drainBackgroundWork.calledOnce).to.be.true + }) +}) }) diff --git a/test/unit/infra/git/git-error-messages.test.ts b/test/unit/infra/git/git-error-messages.test.ts new file mode 100644 index 000000000..657dbacc5 --- /dev/null +++ b/test/unit/infra/git/git-error-messages.test.ts @@ -0,0 +1,58 @@ +import {expect} from 'chai' + +import {formatOverwriteMessage} from '../../../../src/server/infra/git/git-error-messages.js' + +describe('formatOverwriteMessage', () => { + it('produces the merge wording with tab-indented file list', () => { + const message = formatOverwriteMessage('merge', ['a.md', 'notes/b.md']) + expect(message).to.equal( + 'Your local changes to the following files would be overwritten by merge:\n' + + '\ta.md\n' + + '\tnotes/b.md\n' + + 'Please commit or discard your changes before you merge.', + ) + }) + + it('produces the checkout wording with tab-indented file list', () => { + const message = formatOverwriteMessage('checkout', ['a.md']) + expect(message).to.equal( + 'Your local changes to the following files would be overwritten by checkout:\n' + + '\ta.md\n' + + 'Please commit or discard your changes before you switch branches.', + ) + }) + + it('produces the pull wording (operation defaults its own action verb)', () => { + expect(formatOverwriteMessage('pull', ['a.md'])).to.equal( + 'Your local changes to the following files would be overwritten by pull:\n' + + '\ta.md\n' + + 'Please commit or discard your changes before you pull.', + ) + }) + + it('produces a no-list form when caller has no file paths (CheckoutConflictError fallback)', () => { + expect(formatOverwriteMessage('merge', [])).to.equal( + 'Your local changes would be overwritten by merge. Please commit or discard your changes before you merge.', + ) + expect(formatOverwriteMessage('checkout', [])).to.equal( + 'Your local changes would be overwritten by checkout. Please commit or discard your changes before you switch branches.', + ) + expect(formatOverwriteMessage('pull', [])).to.equal( + 'Your local changes would be overwritten by pull. Please commit or discard your changes before you pull.', + ) + }) + + it('contains the "would be overwritten" anchor that vc-handler error mapping greps for', () => { + expect(formatOverwriteMessage('merge', ['x'])).to.include('would be overwritten') + expect(formatOverwriteMessage('checkout', ['x'])).to.include('would be overwritten') + expect(formatOverwriteMessage('merge', [])).to.include('would be overwritten') + expect(formatOverwriteMessage('checkout', [])).to.include('would be overwritten') + }) + + it('points the user at the discard escape hatch (brv vc reset), not stash (brv has none)', () => { + expect(formatOverwriteMessage('merge', ['x'])).to.include('discard') + expect(formatOverwriteMessage('checkout', ['x'])).to.include('discard') + expect(formatOverwriteMessage('merge', ['x'])).to.not.include('stash') + expect(formatOverwriteMessage('checkout', ['x'])).to.not.include('stash') + }) +}) diff --git a/test/unit/infra/git/status-row-classifier.test.ts b/test/unit/infra/git/status-row-classifier.test.ts new file mode 100644 index 000000000..6ea1da097 --- /dev/null +++ b/test/unit/infra/git/status-row-classifier.test.ts @@ -0,0 +1,245 @@ +import {expect} from 'chai' + +import {GitError} from '../../../../src/server/core/domain/errors/git-error.js' +import {classifyTuple} from '../../../../src/server/infra/git/status-row-classifier.js' + +describe('classifyTuple', () => { + describe('clean state', () => { + it('[1,1,1] returns dirty=false, no entries', () => { + const c = classifyTuple(1, 1, 1) + expect(c.dirty).to.equal(false) + expect(c.staged).to.equal(false) + expect(c.files).to.deep.equal([]) + expect(c.stagedDiff).to.equal(undefined) + expect(c.unstagedDiff).to.equal(undefined) + }) + }) + + describe('untracked / new file', () => { + it('[0,2,0] untracked new file', () => { + const c = classifyTuple(0, 2, 0) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(false) + expect(c.files).to.deep.equal([{staged: false, status: 'untracked'}]) + expect(c.stagedDiff).to.equal(undefined) + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[0,2,2] staged new file', () => { + const c = classifyTuple(0, 2, 2) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([{staged: true, status: 'added'}]) + expect(c.stagedDiff).to.equal('added') + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[0,2,3] partially staged new file', () => { + const c = classifyTuple(0, 2, 3) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'added'}, + {staged: false, status: 'modified'}, + ]) + expect(c.stagedDiff).to.equal('added') + expect(c.unstagedDiff).to.equal('modified') + }) + }) + + describe('deletions', () => { + it('[1,0,0] staged deletion (git rm)', () => { + const c = classifyTuple(1, 0, 0) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([{staged: true, status: 'deleted'}]) + expect(c.stagedDiff).to.equal('deleted') + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[1,0,1] unstaged deletion (rm without git rm)', () => { + const c = classifyTuple(1, 0, 1) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(false) + expect(c.files).to.deep.equal([{staged: false, status: 'deleted'}]) + expect(c.stagedDiff).to.equal(undefined) + expect(c.unstagedDiff).to.equal('deleted') + }) + + it('[1,0,2] absent from disk, index differs from HEAD reports both staged-modified + unstaged-deleted', () => { + // Native git shows MD: HEAD->INDEX is a modification, INDEX->WORKDIR is a deletion. + // Mirrors [1,0,3]; anything less leaves vc status disagreeing with vc diff --staged. + const c = classifyTuple(1, 0, 2) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'modified'}, + {staged: false, status: 'deleted'}, + ]) + expect(c.stagedDiff).to.equal('modified') + expect(c.unstagedDiff).to.equal('deleted') + }) + + it('[1,0,3] staged modification then deleted from disk', () => { + const c = classifyTuple(1, 0, 3) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'modified'}, + {staged: false, status: 'deleted'}, + ]) + expect(c.stagedDiff).to.equal('modified') + expect(c.unstagedDiff).to.equal('deleted') + }) + + it('[1,1,0] git rm --cached', () => { + const c = classifyTuple(1, 1, 0) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'deleted'}, + {staged: false, status: 'untracked'}, + ]) + expect(c.stagedDiff).to.equal('deleted') + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[1,2,0] git rm --cached then edit workdir', () => { + const c = classifyTuple(1, 2, 0) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'deleted'}, + {staged: false, status: 'untracked'}, + ]) + expect(c.stagedDiff).to.equal('deleted') + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[0,0,3] staged add then deleted from disk', () => { + const c = classifyTuple(0, 0, 3) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'added'}, + {staged: false, status: 'deleted'}, + ]) + expect(c.stagedDiff).to.equal('added') + expect(c.unstagedDiff).to.equal('deleted') + }) + }) + + describe('modifications', () => { + it('[1,1,3] workdir restored to HEAD after add reports both staged + unstaged modifications', () => { + // When stage differs from both HEAD and workdir, native git surfaces a staged + // modified entry AND an unstaged modified entry. Mirrors [1,2,3]; anything less + // leaves `vc status` and `vc diff` disagreeing on the unstaged side. + const c = classifyTuple(1, 1, 3) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'modified'}, + {staged: false, status: 'modified'}, + ]) + expect(c.stagedDiff).to.equal('modified') + expect(c.unstagedDiff).to.equal('modified') + }) + + it('[1,2,1] unstaged modification', () => { + const c = classifyTuple(1, 2, 1) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(false) + expect(c.files).to.deep.equal([{staged: false, status: 'modified'}]) + expect(c.stagedDiff).to.equal(undefined) + expect(c.unstagedDiff).to.equal('modified') + }) + + it('[1,2,2] staged modification', () => { + const c = classifyTuple(1, 2, 2) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([{staged: true, status: 'modified'}]) + expect(c.stagedDiff).to.equal('modified') + expect(c.unstagedDiff).to.equal(undefined) + }) + + it('[1,2,3] partially staged modification', () => { + const c = classifyTuple(1, 2, 3) + expect(c.dirty).to.equal(true) + expect(c.staged).to.equal(true) + expect(c.files).to.deep.equal([ + {staged: true, status: 'modified'}, + {staged: false, status: 'modified'}, + ]) + expect(c.stagedDiff).to.equal('modified') + expect(c.unstagedDiff).to.equal('modified') + }) + }) + + describe('cross-property invariants over the full (h,w,s) cartesian product', () => { + // The algorithmic body is total over the encoding's value range. These properties + // hold by construction for every in-range (h,w,s); if any one fails, the algorithm + // has been broken in a way that no per-tuple test would necessarily catch. + const inRangeTuples: Array<[number, number, number]> = [] + for (const h of [0, 1]) { + for (const w of [0, 1, 2]) { + for (const s of [0, 1, 2, 3]) { + inRangeTuples.push([h, w, s]) + } + } + } + + for (const [head, workdir, stage] of inRangeTuples) { + const tag = `[${head},${workdir},${stage}]` + + it(`${tag} dirty <-> !(h===1 && w===1 && s===1)`, () => { + const c = classifyTuple(head, workdir, stage) + expect(c.dirty).to.equal(!(head === 1 && workdir === 1 && stage === 1)) + }) + + it(`${tag} staged <-> stagedDiff defined`, () => { + const c = classifyTuple(head, workdir, stage) + expect(c.staged).to.equal(c.stagedDiff !== undefined) + }) + + it(`${tag} staged <-> (stage !== head)`, () => { + const c = classifyTuple(head, workdir, stage) + expect(c.staged).to.equal(stage !== head) + }) + + it(`${tag} untracked entry present <-> s===0 && w>0`, () => { + const c = classifyTuple(head, workdir, stage) + const hasUntracked = c.files.some((f) => f.status === 'untracked') + expect(hasUntracked).to.equal(stage === 0 && workdir > 0) + }) + + it(`${tag} files.length matches stagedDiff/untracked/unstagedDiff projections`, () => { + const c = classifyTuple(head, workdir, stage) + const untracked = stage === 0 && workdir > 0 + const expectedLen = (c.stagedDiff ? 1 : 0) + (untracked ? 1 : c.unstagedDiff ? 1 : 0) + expect(c.files.length).to.equal(expectedLen) + }) + + it(`${tag} when files reports any entry, dirty must be true`, () => { + const c = classifyTuple(head, workdir, stage) + if (c.files.length > 0) expect(c.dirty).to.equal(true) + }) + } + }) + + describe('out-of-range columns', () => { + // The algorithm is total over the encoding's value range. Columns outside that + // range are the cleanest signal that isomorphic-git changed the encoding shape. + it('throws GitError when HEAD column is out of range', () => { + expect(() => classifyTuple(2, 0, 0)).to.throw(GitError, /HEAD column out of range/) + }) + + it('throws GitError when WORKDIR column is out of range', () => { + expect(() => classifyTuple(0, 3, 0)).to.throw(GitError, /WORKDIR column out of range/) + }) + + it('throws GitError when STAGE column is out of range', () => { + expect(() => classifyTuple(0, 0, 4)).to.throw(GitError, /STAGE column out of range/) + }) + }) +}) diff --git a/test/unit/infra/process/curate-log-handler.test.ts b/test/unit/infra/process/curate-log-handler.test.ts index 61bd0d659..f3b8defd6 100644 --- a/test/unit/infra/process/curate-log-handler.test.ts +++ b/test/unit/infra/process/curate-log-handler.test.ts @@ -647,6 +647,135 @@ describe('CurateLogHandler', () => { await handlerWithBadCallback.onTaskCompleted('task-abc', 'done', makeTask()) }) }) + + // ========================================================================== + // reviewDisabled — `brv review --disable` + // ========================================================================== + + describe('reviewDisabled', () => { + it('does not mark operations as pending review when task.reviewDisabled=true', async () => { + const handlerWithToggle = new CurateLogHandler(() => store) + + await handlerWithToggle.onTaskCreate(makeTask({reviewDisabled: true})) + handlerWithToggle.onToolResult('task-abc', { + result: { + applied: [ + {confidence: 'low', impact: 'high', needsReview: true, path: '/a.md', reason: 'uncertain', status: 'success', type: 'UPDATE'}, + {confidence: 'high', impact: 'high', needsReview: true, path: '/b.md', reason: 'irreversible', status: 'success', type: 'DELETE'}, + ], + }, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + + await handlerWithToggle.onTaskCompleted('task-abc', 'done', makeTask({reviewDisabled: true})) + + const completed: CurateLogEntry = store.save.secondCall.args[0] + expect(completed.operations[0].reviewStatus).to.be.undefined + expect(completed.operations[1].reviewStatus).to.be.undefined + }) + + it('still marks operations as pending review when task.reviewDisabled=false', async () => { + const handlerWithToggle = new CurateLogHandler(() => store) + + await handlerWithToggle.onTaskCreate(makeTask({reviewDisabled: false})) + handlerWithToggle.onToolResult('task-abc', { + result: { + applied: [ + {confidence: 'low', impact: 'high', needsReview: true, path: '/a.md', status: 'success', type: 'UPDATE'}, + ], + }, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + await handlerWithToggle.onTaskCompleted('task-abc', 'done', makeTask({reviewDisabled: false})) + + const completed: CurateLogEntry = store.save.secondCall.args[0] + expect(completed.operations[0].reviewStatus).to.equal('pending') + }) + + it('treats undefined task.reviewDisabled as enabled (fail-open)', async () => { + const handlerWithToggle = new CurateLogHandler(() => store) + + await handlerWithToggle.onTaskCreate(makeTask()) + handlerWithToggle.onToolResult('task-abc', { + result: {applied: [{needsReview: true, path: '/a.md', status: 'success', type: 'DELETE'}]}, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + await handlerWithToggle.onTaskCompleted('task-abc', 'done', makeTask()) + + const completed: CurateLogEntry = store.save.secondCall.args[0] + expect(completed.operations[0].reviewStatus).to.equal('pending') + }) + + it('skips firing onPendingReviews callback when task.reviewDisabled=true', async () => { + const notifications: Array<{pendingCount: number}> = [] + const handlerWithToggle = new CurateLogHandler( + () => store, + (info) => notifications.push({pendingCount: info.pendingCount}), + ) + + await handlerWithToggle.onTaskCreate(makeTask({reviewDisabled: true})) + handlerWithToggle.onToolResult('task-abc', { + result: { + applied: [{needsReview: true, path: '/a.md', status: 'success', type: 'DELETE'}], + }, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + + await handlerWithToggle.onTaskCompleted('task-abc', 'done', makeTask({reviewDisabled: true})) + + expect(notifications).to.have.lengthOf(0) + }) + + it('reports zero pendingReviewCount via getTaskCompletionData when disabled', async () => { + const handlerWithToggle = new CurateLogHandler(() => store) + + await handlerWithToggle.onTaskCreate(makeTask({reviewDisabled: true})) + handlerWithToggle.onToolResult('task-abc', { + result: {applied: [{needsReview: true, path: '/a.md', status: 'success', type: 'DELETE'}]}, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + + expect(handlerWithToggle.getTaskCompletionData('task-abc')).to.deep.equal({}) + }) + + it('snapshots task.reviewDisabled at create time — mid-task TaskInfo mutation does not affect onToolResult', async () => { + const handlerWithToggle = new CurateLogHandler(() => store) + const task = makeTask({reviewDisabled: true}) + + await handlerWithToggle.onTaskCreate(task) + + // Simulate user toggling between create and tool-result. The handler must + // ignore mutations to the original TaskInfo because it snapshotted on create. + task.reviewDisabled = false + + handlerWithToggle.onToolResult('task-abc', { + result: {applied: [{needsReview: true, path: '/a.md', status: 'success', type: 'DELETE'}]}, + sessionId: 'sess-1', + success: true, + taskId: 'task-abc', + toolName: 'curate', + } as never) + await handlerWithToggle.onTaskCompleted('task-abc', 'done', task) + + const completed: CurateLogEntry = store.save.secondCall.args[0] + expect(completed.operations[0].reviewStatus).to.be.undefined + }) + }) }) }) // end curate-log-handler diff --git a/test/unit/infra/process/task-router.test.ts b/test/unit/infra/process/task-router.test.ts index f7db5df35..9f28309f1 100644 --- a/test/unit/infra/process/task-router.test.ts +++ b/test/unit/infra/process/task-router.test.ts @@ -216,6 +216,87 @@ describe('TaskRouter', () => { expect(submittedTask).to.have.property('type', 'curate') }) + describe('reviewDisabled stamping', () => { + it('stamps reviewDisabled=true on TaskExecute when resolver returns true', async () => { + const routerWithResolver = new TaskRouter({ + agentPool, + getAgentForProject, + isReviewDisabled: sandbox.stub().resolves(true), + projectRegistry, + projectRouter, + transport: transportHelper.transport, + }) + routerWithResolver.setup() + + const handler = transportHelper.requestHandlers.get(TransportTaskEventNames.CREATE) + const request = makeTaskCreateRequest() + await handler!(request, 'client-1') + + await new Promise((resolve) => { setTimeout(resolve, 10) }) + + const submittedTask = agentPool.submitTask.firstCall.args[0] + expect(submittedTask).to.have.property('reviewDisabled', true) + }) + + it('stamps reviewDisabled=false on TaskExecute when resolver returns false', async () => { + const routerWithResolver = new TaskRouter({ + agentPool, + getAgentForProject, + isReviewDisabled: sandbox.stub().resolves(false), + projectRegistry, + projectRouter, + transport: transportHelper.transport, + }) + routerWithResolver.setup() + + const handler = transportHelper.requestHandlers.get(TransportTaskEventNames.CREATE) + const request = makeTaskCreateRequest() + await handler!(request, 'client-1') + + await new Promise((resolve) => { setTimeout(resolve, 10) }) + + const submittedTask = agentPool.submitTask.firstCall.args[0] + expect(submittedTask).to.have.property('reviewDisabled', false) + }) + + it('omits reviewDisabled from TaskExecute when no resolver is configured', async () => { + const handler = transportHelper.requestHandlers.get(TransportTaskEventNames.CREATE) + const request = makeTaskCreateRequest() + await handler!(request, 'client-1') + + await new Promise((resolve) => { setTimeout(resolve, 10) }) + + const submittedTask = agentPool.submitTask.firstCall.args[0] + expect(submittedTask).to.not.have.property('reviewDisabled') + }) + + it('stamps explicit reviewDisabled=false when resolver throws (fail-open with single concrete value)', async () => { + // Returning undefined here would re-introduce the daemon/agent divergence the + // snapshot is meant to prevent: daemon stamps no field → CurateLogHandler treats + // as enabled (`?? false`), but the agent process opens no ALS scope and may + // observe a different value from .brv/config.json. Stamping a concrete `false` + // keeps both sides aligned (review enabled, fail-open). + const routerWithResolver = new TaskRouter({ + agentPool, + getAgentForProject, + isReviewDisabled: sandbox.stub().rejects(new Error('config read failed')), + projectRegistry, + projectRouter, + transport: transportHelper.transport, + }) + routerWithResolver.setup() + + const handler = transportHelper.requestHandlers.get(TransportTaskEventNames.CREATE) + const request = makeTaskCreateRequest() + await handler!(request, 'client-1') + + await new Promise((resolve) => { setTimeout(resolve, 10) }) + + const submittedTask = agentPool.submitTask.firstCall.args[0] + expect(submittedTask).to.have.property('reviewDisabled', false) + }) + }) + it('should return same taskId for duplicate create (idempotent)', async () => { const handler = transportHelper.requestHandlers.get(TransportTaskEventNames.CREATE) const request = makeTaskCreateRequest() diff --git a/test/unit/infra/transport/handlers/model-handler.test.ts b/test/unit/infra/transport/handlers/model-handler.test.ts index da2173205..4c778a128 100644 --- a/test/unit/infra/transport/handlers/model-handler.test.ts +++ b/test/unit/infra/transport/handlers/model-handler.test.ts @@ -94,6 +94,38 @@ describe('ModelHandler', () => { expect(providerConfigStore.setActiveModel.calledBefore(transport.broadcast)).to.be.true }) + it('should activate the provider when picking a model (load-bearing for openai-compatible deferred-activation)', async () => { + // The openai-compatible connect path passes setAsActive: false when no + // active model will be set. The provider only becomes active once a + // model is picked here — so this call MUST flip activeProvider, or + // the openai-compatible setup flow ends up "connected but never + // active". A regression that drops this call would silently break it. + providerConfigStore.read.resolves( + ProviderConfig.fromJson({ + activeProvider: '', + providers: { + 'openai-compatible': { + authMethod: 'api-key', + baseUrl: 'http://localhost:11434/v1', + connectedAt: new Date().toISOString(), + favoriteModels: [], + recentModels: [], + }, + }, + }), + ) + + createHandler() + + const handler = transport._handlers.get(ModelEvents.SET_ACTIVE) + const result = await handler!({modelId: 'qwen3.5-9b', providerId: 'openai-compatible'}, 'client-1') + + expect(result).to.deep.equal({success: true}) + expect(providerConfigStore.setActiveProvider.calledWith('openai-compatible')).to.be.true + expect(providerConfigStore.setActiveModel.calledWith('openai-compatible', 'qwen3.5-9b')).to.be.true + expect(providerConfigStore.setActiveProvider.calledBefore(providerConfigStore.setActiveModel)).to.be.true + }) + it('should reject when provider is not connected', async () => { createHandler() diff --git a/test/unit/infra/transport/handlers/provider-handler.test.ts b/test/unit/infra/transport/handlers/provider-handler.test.ts index 112eaa1b3..360b80196 100644 --- a/test/unit/infra/transport/handlers/provider-handler.test.ts +++ b/test/unit/infra/transport/handlers/provider-handler.test.ts @@ -99,6 +99,28 @@ describe('ProviderHandler', () => { return handler } + function createHandlerWithValidator( + validateOpenAICompatibleEndpoint: (params: {apiKey: string; baseUrl: string}) => Promise<{ + error?: string + isValid: boolean + }>, + ): ProviderHandler { + const handler = new ProviderHandler({ + authStateStore, + browserLauncher, + createCallbackServer: () => mockCallbackServer as unknown as ProviderCallbackServer, + exchangeCodeForTokens: exchangeCodeStub, + generatePkce: generatePkceStub, + providerConfigStore, + providerKeychainStore, + providerOAuthTokenStore, + transport, + validateOpenAICompatibleEndpoint, + }) + handler.setup() + return handler + } + describe('setup', () => { it('should register all provider event handlers', () => { createHandler() @@ -160,6 +182,191 @@ describe('ProviderHandler', () => { expect(providerConfigStore.connectProvider.calledBefore(transport.broadcast)).to.be.true }) + + describe('openai-compatible URL validation', () => { + beforeEach(() => { + // setupConnect now reads existing config to fall back on stored + // baseUrl/apiKey when the request omits them — give the mock a + // sensible default so each test doesn't have to wire it up. + providerConfigStore.read.resolves(ProviderConfig.createDefault()) + }) + + it('should validate base URL via injected validator before persisting anything', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + const result = await connectHandler!( + {apiKey: 'test-key', baseUrl: 'http://localhost:11434/v1', providerId: 'openai-compatible'}, + 'client-1', + ) + + expect(result).to.deep.equal({success: true}) + expect(validateStub.calledOnce).to.be.true + expect(validateStub.firstCall.args[0]).to.deep.equal({ + apiKey: 'test-key', + baseUrl: 'http://localhost:11434/v1', + }) + expect(validateStub.calledBefore(providerKeychainStore.setApiKey)).to.be.true + expect(validateStub.calledBefore(providerConfigStore.connectProvider)).to.be.true + }) + + it('should return error and persist nothing when endpoint validation fails', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({error: 'connect ECONNREFUSED', isValid: false}) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + const result = await connectHandler!( + {apiKey: 'test-key', baseUrl: 'http://nope', providerId: 'openai-compatible'}, + 'client-1', + ) + + expect(result.success).to.be.false + expect(result.error).to.include('http://nope') + expect(result.error).to.include('connect ECONNREFUSED') + expect(providerKeychainStore.setApiKey.notCalled).to.be.true + expect(providerConfigStore.connectProvider.notCalled).to.be.true + expect(transport.broadcast.notCalled).to.be.true + }) + + it('should not pre-write activeModel for openai-compatible', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + await connectHandler!( + {baseUrl: 'http://localhost:11434/v1', providerId: 'openai-compatible'}, + 'client-1', + ) + + const connectArgs = providerConfigStore.connectProvider.firstCall.args[1] as Record + expect(connectArgs.activeModel).to.be.undefined + expect(connectArgs.baseUrl).to.equal('http://localhost:11434/v1') + }) + + it('should reject with friendly error when no baseUrl is provided and none is stored', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + providerConfigStore.read.resolves(ProviderConfig.createDefault()) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + const result = await connectHandler!({providerId: 'openai-compatible'}, 'client-1') + + expect(result.success).to.be.false + expect(result.error).to.include('base URL is required') + expect(validateStub.notCalled).to.be.true + expect(providerConfigStore.connectProvider.notCalled).to.be.true + }) + + it('should validate with the stored baseUrl when the request omits it', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + const stored = ProviderConfig.createDefault().withProviderConnected('openai-compatible', { + baseUrl: 'http://stored:11434/v1', + }) + providerConfigStore.read.resolves(stored) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + const result = await connectHandler!({providerId: 'openai-compatible'}, 'client-1') + + expect(result).to.deep.equal({success: true}) + expect(validateStub.firstCall.args[0]).to.deep.include({baseUrl: 'http://stored:11434/v1'}) + }) + + it('should validate with the stored API key when the request omits it', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + providerKeychainStore.getApiKey.withArgs('openai-compatible').resolves('stored-key') + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + await connectHandler!( + {baseUrl: 'http://localhost:11434/v1', providerId: 'openai-compatible'}, + 'client-1', + ) + + expect(validateStub.firstCall.args[0]).to.deep.equal({ + apiKey: 'stored-key', + baseUrl: 'http://localhost:11434/v1', + }) + }) + + it('should not validate non-openai-compatible providers', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + await connectHandler!({apiKey: 'test-key', providerId: 'openrouter'}, 'client-1') + + expect(validateStub.notCalled).to.be.true + }) + + it('should not activate openai-compatible when no active model will be set', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + providerConfigStore.getActiveModel.withArgs('openai-compatible').resolves() + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + await connectHandler!( + {baseUrl: 'http://localhost:11434/v1', providerId: 'openai-compatible'}, + 'client-1', + ) + + const connectArgs = providerConfigStore.connectProvider.firstCall.args[1] as Record + expect(connectArgs.setAsActive).to.equal(false) + }) + + it('should activate openai-compatible when an existing active model will be preserved', async () => { + const validateStub = stub< + [{apiKey: string; baseUrl: string}], + Promise<{error?: string; isValid: boolean}> + >().resolves({isValid: true}) + providerConfigStore.getActiveModel.withArgs('openai-compatible').resolves('qwen3.5-9b') + createHandlerWithValidator(validateStub) + + const connectHandler = transport._handlers.get(ProviderEvents.CONNECT) + await connectHandler!( + {baseUrl: 'http://localhost:11434/v1', providerId: 'openai-compatible'}, + 'client-1', + ) + + const connectArgs = providerConfigStore.connectProvider.firstCall.args[1] as Record + expect(connectArgs.setAsActive).to.equal(true) + }) + }) + + it('should activate non-openai-compatible providers (registry has a defaultModel)', async () => { + createHandler() + + const handler = transport._handlers.get(ProviderEvents.CONNECT) + await handler!({apiKey: 'test-key', providerId: 'openrouter'}, 'client-1') + + const connectArgs = providerConfigStore.connectProvider.firstCall.args[1] as Record + expect(connectArgs.setAsActive).to.equal(true) + }) }) describe('provider:disconnect', () => { @@ -710,6 +917,22 @@ describe('ProviderHandler', () => { expect(openaiProvider?.authMethod).to.equal('oauth') expect(openaiProvider?.requiresApiKey).to.be.false }) + + it('should expose activeModel as undefined for openai-compatible connected without a model', async () => { + const config = ProviderConfig.createDefault().withProviderConnected('openai-compatible', { + baseUrl: 'http://localhost:11434/v1', + }) + providerConfigStore.read.resolves(config) + providerConfigStore.isProviderConnected.withArgs('openai-compatible').resolves(true) + createHandler() + + const handler = transport._handlers.get(ProviderEvents.LIST) + const result = await handler!(undefined, 'client-1') + + const openaiCompat = result.providers.find((p: {id: string}) => p.id === 'openai-compatible') + expect(openaiCompat?.isConnected).to.be.true + expect(openaiCompat?.activeModel).to.be.undefined + }) }) describe('provider:cancelOAuth', () => { diff --git a/test/unit/infra/transport/handlers/review-handler.test.ts b/test/unit/infra/transport/handlers/review-handler.test.ts new file mode 100644 index 000000000..615968ae9 --- /dev/null +++ b/test/unit/infra/transport/handlers/review-handler.test.ts @@ -0,0 +1,165 @@ +import type {SinonStub} from 'sinon' + +import {expect} from 'chai' +import {restore, stub} from 'sinon' + +import type {ICurateLogStore} from '../../../../../src/server/core/interfaces/storage/i-curate-log-store.js' +import type {IProjectConfigStore} from '../../../../../src/server/core/interfaces/storage/i-project-config-store.js' +import type {IReviewBackupStore} from '../../../../../src/server/core/interfaces/storage/i-review-backup-store.js' + +import {BRV_CONFIG_VERSION} from '../../../../../src/server/constants.js' +import {BrvConfig} from '../../../../../src/server/core/domain/entities/brv-config.js' +import {ReviewHandler} from '../../../../../src/server/infra/transport/handlers/review-handler.js' +import {ReviewEvents} from '../../../../../src/shared/transport/events/review-events.js' +import {createMockTransportServer, type MockTransportServer} from '../../../../helpers/mock-factories.js' + +describe('ReviewHandler — config toggle (GET_DISABLED / SET_DISABLED)', () => { + let resolveProjectPath: SinonStub + let transport: MockTransportServer + let projectConfigStore: Partial & {read: SinonStub; write: SinonStub} + let curateLogStoreFactory: SinonStub + let reviewBackupStoreFactory: SinonStub + + beforeEach(() => { + resolveProjectPath = stub().returns('/test/project') + transport = createMockTransportServer() + projectConfigStore = { + read: stub(), + write: stub().resolves(), + } + curateLogStoreFactory = stub().returns({} as ICurateLogStore) + reviewBackupStoreFactory = stub().returns({} as IReviewBackupStore) + }) + + afterEach(() => { + restore() + }) + + function createHandler(): ReviewHandler { + const handler = new ReviewHandler({ + curateLogStoreFactory, + projectConfigStore: projectConfigStore as IProjectConfigStore, + resolveProjectPath, + reviewBackupStoreFactory, + transport, + }) + handler.setup() + return handler + } + + async function callGetDisabled(clientId = 'client-1'): Promise<{reviewDisabled: boolean}> { + const handler = transport._handlers.get(ReviewEvents.GET_DISABLED) + expect(handler, 'review:getDisabled handler should be registered').to.exist + return handler!({}, clientId) as Promise<{reviewDisabled: boolean}> + } + + async function callSetDisabled( + reviewDisabled: boolean, + clientId = 'client-1', + ): Promise<{reviewDisabled: boolean}> { + const handler = transport._handlers.get(ReviewEvents.SET_DISABLED) + expect(handler, 'review:setDisabled handler should be registered').to.exist + return handler!({reviewDisabled}, clientId) as Promise<{reviewDisabled: boolean}> + } + + describe('setup', () => { + it('registers review:getDisabled and review:setDisabled handlers', () => { + createHandler() + expect(transport._handlers.has(ReviewEvents.GET_DISABLED)).to.be.true + expect(transport._handlers.has(ReviewEvents.SET_DISABLED)).to.be.true + }) + }) + + describe('handleGetDisabled', () => { + it('returns reviewDisabled=false when config has no flag', async () => { + projectConfigStore.read.resolves(BrvConfig.createLocal({cwd: '/test/project'})) + createHandler() + + const response = await callGetDisabled() + expect(response.reviewDisabled).to.equal(false) + }) + + it('returns reviewDisabled=true when config has the flag set', async () => { + projectConfigStore.read.resolves( + new BrvConfig({createdAt: '2025-01-01T00:00:00.000Z', cwd: '/test/project', reviewDisabled: true, version: BRV_CONFIG_VERSION}), + ) + createHandler() + + const response = await callGetDisabled() + expect(response.reviewDisabled).to.equal(true) + }) + + it('throws when project is not initialized', async () => { + projectConfigStore.read.resolves() + createHandler() + + let threw = false + try { + await callGetDisabled() + } catch (error) { + threw = true + expect(String(error)).to.match(/not initialized/i) + } + + expect(threw, 'should throw when config is missing').to.be.true + }) + }) + + describe('handleSetDisabled', () => { + it('writes reviewDisabled=true while preserving other config fields', async () => { + const original = new BrvConfig({ + chatLogPath: '/path/chat.log', + createdAt: '2025-01-01T00:00:00.000Z', + cwd: '/test/project', + ide: 'Claude Code', + spaceId: 'space-1', + spaceName: 'space', + teamId: 'team-1', + teamName: 'team', + version: BRV_CONFIG_VERSION, + }) + projectConfigStore.read.resolves(original) + createHandler() + + const response = await callSetDisabled(true) + + expect(response.reviewDisabled).to.equal(true) + expect(projectConfigStore.write.calledOnce).to.be.true + const written: BrvConfig = projectConfigStore.write.firstCall.args[0] + expect(written.reviewDisabled).to.equal(true) + expect(written.spaceId).to.equal('space-1') + expect(written.teamId).to.equal('team-1') + expect(written.chatLogPath).to.equal('/path/chat.log') + expect(projectConfigStore.write.firstCall.args[1]).to.equal('/test/project') + }) + + it('writes reviewDisabled=false when re-enabling', async () => { + projectConfigStore.read.resolves( + new BrvConfig({createdAt: '2025-01-01T00:00:00.000Z', cwd: '/test/project', reviewDisabled: true, version: BRV_CONFIG_VERSION}), + ) + createHandler() + + const response = await callSetDisabled(false) + + expect(response.reviewDisabled).to.equal(false) + const written: BrvConfig = projectConfigStore.write.firstCall.args[0] + expect(written.reviewDisabled).to.equal(false) + }) + + it('throws when project is not initialized', async () => { + projectConfigStore.read.resolves() + createHandler() + + let threw = false + try { + await callSetDisabled(true) + } catch (error) { + threw = true + expect(String(error)).to.match(/not initialized/i) + } + + expect(threw, 'should throw when config is missing').to.be.true + expect(projectConfigStore.write.called).to.be.false + }) + }) +}) diff --git a/test/unit/infra/transport/handlers/vc-handler.test.ts b/test/unit/infra/transport/handlers/vc-handler.test.ts index 9510fe8b5..44c59ab51 100644 --- a/test/unit/infra/transport/handlers/vc-handler.test.ts +++ b/test/unit/infra/transport/handlers/vc-handler.test.ts @@ -1445,7 +1445,9 @@ describe('VcHandler', () => { deps.gitService.isInitialized.resolves(true) deps.gitService.listRemotes.resolves([{remote: 'origin', url: 'https://example.com/repo.git'}]) deps.gitService.pull.rejects( - new GitError('Local changes would be overwritten by pull. Commit or discard your changes first.'), + new GitError( + 'Your local changes would be overwritten by pull. Please commit or discard your changes before you pull.', + ), ) deps.gitService.getTrackingBranch.resolves({remote: 'origin', remoteBranch: 'main'}) makeVcHandler(deps).setup() @@ -1462,6 +1464,34 @@ describe('VcHandler', () => { } }) + it('should surface affected file paths in pull overwrite error', async () => { + const deps = makeDeps(sandbox, projectPath) + deps.gitService.isInitialized.resolves(true) + deps.gitService.listRemotes.resolves([{remote: 'origin', url: 'https://example.com/repo.git'}]) + deps.gitService.pull.rejects( + new GitError( + 'Your local changes to the following files would be overwritten by pull:\n' + + '\ttracked.md\n' + + '\tnotes/log.md\n' + + 'Please commit or discard your changes before you pull.', + ), + ) + deps.gitService.getTrackingBranch.resolves({remote: 'origin', remoteBranch: 'main'}) + makeVcHandler(deps).setup() + + try { + await deps.requestHandlers[VcEvents.PULL]({}, CLIENT_ID) + expect.fail('Expected error') + } catch (error) { + expect(error).to.be.instanceOf(VcError) + if (error instanceof VcError) { + expect(error.code).to.equal(VcErrorCode.UNCOMMITTED_CHANGES) + expect(error.message).to.include('tracked.md') + expect(error.message).to.include('notes/log.md') + } + } + }) + it('should throw VcError UNRELATED_HISTORIES when pull fails with unrelated histories GitError', async () => { const deps = makeDeps(sandbox, projectPath) deps.gitService.isInitialized.resolves(true) @@ -3020,8 +3050,8 @@ describe('VcHandler', () => { deps.gitService.getCurrentBranch.resolves('main') deps.gitService.checkout.rejects( new GitError( - 'Your local changes to the following files would be overwritten by checkout. ' + - 'Commit your changes or stash them before you switch branches.', + 'Your local changes would be overwritten by checkout. ' + + 'Please commit or discard your changes before you switch branches.', ), ) makeVcHandler(deps).setup() From 8b6167ba9e7889b9b059f7c813f87443a734c501 Mon Sep 17 00:00:00 2001 From: Danh Doan Date: Mon, 4 May 2026 11:28:14 +0700 Subject: [PATCH 13/13] refactor: address review-agent feedback on PR #601 Four small reviewer-flagged issues with focused fixes: - abstract-queue.ts: drop non-absolute contextPath at enqueue with a trace breadcrumb. Background batch writes derive .abstract.md / .overview.md via raw writeFile and errors are catch-suppressed, so a relative path would silently land under process.cwd(). - tool-manager.ts: document the insertion-order contract for CURATE_TOOL_NAMES and QUERY_TOOL_NAMES at the registration site. toAiSdkTools attaches Anthropic cacheControl: ephemeral to the LAST tool, so reordering this list silently shifts the cache breakpoint. - dream-state-service.ts: skip the disk write in update() when the updater returns the same state reference. drainStaleSummaryPaths and enqueueStaleSummaryPaths already follow the no-op-by-identity convention; this makes the contract observable at the disk level so empty drains don't tmpfile + rename for nothing. New test pins it. - curate-executor.ts: correct the catch-block comment under the cascade-defer enqueue. The previous wording implied diffStates would re-capture the lost paths automatically; in fact paths uniquely changed by the failing curate are dropped until the same files are modified again or dream's own snapshot diff touches them. --- src/agent/infra/map/abstract-queue.ts | 14 +++++++++++++- src/agent/infra/tools/tool-manager.ts | 11 ++++++++++- src/server/infra/dream/dream-state-service.ts | 11 ++++++++++- src/server/infra/executor/curate-executor.ts | 8 ++++++-- test/unit/infra/dream/dream-state-service.test.ts | 13 +++++++++++++ 5 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/agent/infra/map/abstract-queue.ts b/src/agent/infra/map/abstract-queue.ts index 2df2fa97b..862c7b9a7 100644 --- a/src/agent/infra/map/abstract-queue.ts +++ b/src/agent/infra/map/abstract-queue.ts @@ -1,6 +1,6 @@ import {appendFileSync} from 'node:fs' import {mkdir, writeFile} from 'node:fs/promises' -import {join} from 'node:path' +import {isAbsolute, join} from 'node:path' import type {IContentGenerator} from '../../core/interfaces/i-content-generator.js' @@ -111,6 +111,18 @@ export class AbstractGenerationQueue { * Add a file to the abstract generation queue. */ enqueue(item: {contextPath: string; fullContent: string}): void { + // Background batch writes derive .abstract.md / .overview.md from + // contextPath via raw `writeFile`. A relative path would resolve under + // process.cwd() rather than the intended context-tree location, and the + // failure would be invisible because batch errors are catch-suppressed. + // Drop misconfigured items at the entry point with a trace breadcrumb + // rather than failing loudly — callers are internal and treat the queue + // as fail-open. + if (!isAbsolute(item.contextPath)) { + queueLog(`enqueue:dropped non-absolute path=${item.contextPath}`) + return + } + // Guard against paths that must never trigger abstract generation: // - derived artifacts (.abstract.md, .overview.md) — would produce .abstract.abstract.md // - summary index files (_index.md) — domain/topic summaries, not knowledge nodes diff --git a/src/agent/infra/tools/tool-manager.ts b/src/agent/infra/tools/tool-manager.ts index 3ec59230c..63c2523ec 100644 --- a/src/agent/infra/tools/tool-manager.ts +++ b/src/agent/infra/tools/tool-manager.ts @@ -35,6 +35,12 @@ export class ToolManager { /** * Tools allowed for curate operations. * Uses code_exec only - curate operations available via tools.curate() in sandbox. + * + * NOTE: Insertion order is load-bearing for Anthropic prompt caching. + * `toAiSdkTools` attaches `cacheControl: ephemeral` to the LAST tool in + * iteration order, which becomes the cache breakpoint. Reordering this + * list (or the per-call sort in `filterToolsForCommand`) silently shifts + * the breakpoint and can halve cache hit-rate. Append new tools at the end. */ private static readonly CURATE_TOOL_NAMES = [ 'agentic_map', @@ -45,7 +51,10 @@ export class ToolManager { /** * Tools allowed for query operations - only code_exec for programmatic search * All search operations (searchKnowledge, glob, grep, readFile) are available - * via tools.* SDK inside the sandbox + * via tools.* SDK inside the sandbox. + * + * Same insertion-order contract as CURATE_TOOL_NAMES (Anthropic cache + * breakpoint lands on the last tool). */ private static readonly QUERY_TOOL_NAMES = [ 'code_exec', diff --git a/src/server/infra/dream/dream-state-service.ts b/src/server/infra/dream/dream-state-service.ts index ad3597955..d5ac8fe79 100644 --- a/src/server/infra/dream/dream-state-service.ts +++ b/src/server/infra/dream/dream-state-service.ts @@ -130,7 +130,16 @@ export class DreamStateService { return mutex.withLock(async () => { const state = await this.read() const next = updater(state) - await this.write(next) + // Skip the write when the updater returned the same state reference. + // Existing call sites (drainStaleSummaryPaths on empty queue, + // enqueueStaleSummaryPaths with all-duplicate input) already follow + // this convention by returning `state` unchanged — making the no-op + // contract observable at the disk level avoids a tmpfile + rename on + // every empty drain. + if (next !== state) { + await this.write(next) + } + return next }) } diff --git a/src/server/infra/executor/curate-executor.ts b/src/server/infra/executor/curate-executor.ts index fd39394da..dcba97530 100644 --- a/src/server/infra/executor/curate-executor.ts +++ b/src/server/infra/executor/curate-executor.ts @@ -405,8 +405,12 @@ export class CurateExecutor implements ICurateExecutor { const dreamStateService = new DreamStateService({baseDir: path.join(baseDir, BRV_DIR)}) await dreamStateService.enqueueStaleSummaryPaths(changedPaths) } catch { - // Fail-open: queue write errors never block curation. The next - // curate's enqueue will still capture the same paths via diffStates. + // Fail-open: queue write errors never block curation. If this write + // fails the changed paths are lost from the deferred queue; they will + // only be re-captured if the same files are modified in a later curate + // (diffStates compares a fresh pre/post snapshot pair, not a persistent + // accumulator) or picked up by dream's own snapshot diff if dream + // touches them. } try { diff --git a/test/unit/infra/dream/dream-state-service.test.ts b/test/unit/infra/dream/dream-state-service.test.ts index c4907b5b2..e35e1d5cf 100644 --- a/test/unit/infra/dream/dream-state-service.test.ts +++ b/test/unit/infra/dream/dream-state-service.test.ts @@ -2,6 +2,7 @@ import {expect} from 'chai' import {mkdir, readFile, rm, writeFile} from 'node:fs/promises' import {tmpdir} from 'node:os' import {join} from 'node:path' +import {restore, spy} from 'sinon' import type {DreamState} from '../../../../src/server/infra/dream/dream-state-schema.js' @@ -31,6 +32,7 @@ describe('DreamStateService', () => { }) afterEach(async () => { + restore() await rm(tempDir, {force: true, recursive: true}) }) @@ -327,6 +329,17 @@ describe('DreamStateService', () => { expect(snapshot).to.deep.equal([]) }) + it('does NOT issue a write when the queue is already empty', async () => { + // The early-return guard in drainStaleSummaryPaths returns the same + // state ref unchanged; update() then skips the disk write. Without + // this contract, every empty drain would tmpfile + rename for nothing. + const writeSpy = spy(service, 'write') + + await service.drainStaleSummaryPaths() + + expect(writeSpy.called).to.equal(false) + }) + it('different-path enqueue during processing survives', async () => { await service.enqueueStaleSummaryPaths(['auth/jwt/token.md']) const snapshot = await service.drainStaleSummaryPaths()