Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e8fa123
feat: [ENG-2518] batch abstract generation across queued files
RyanNg1403 Apr 28, 2026
0cf9ae5
feat: [ENG-2485] defer summary cascade to dream
RyanNg1403 Apr 28, 2026
0e5e1b3
refactor: [ENG-2485] address review-agent feedback on PR #579
RyanNg1403 Apr 29, 2026
c17a0fa
refactor: [ENG-2518] address review-agent feedback on PR #580
RyanNg1403 Apr 29, 2026
7838bf4
refactor: [ENG-2485] address second-pass review feedback on PR #579
RyanNg1403 Apr 29, 2026
887264f
refactor: [ENG-2518] address second-pass review feedback on PR #580
RyanNg1403 Apr 29, 2026
adda26e
Merge pull request #579 from campfirein/feat/ENG-2485
danhdoan May 1, 2026
b5c25ff
Merge branch 'proj/curation-enhancement' into feat/ENG-2518
danhdoan May 1, 2026
3fbb17d
Merge pull request #580 from campfirein/feat/ENG-2518
danhdoan May 1, 2026
19db5ba
feat: [ENG-2519] enable prefix caching for providers
RyanNg1403 May 1, 2026
493e848
refactor: [ENG-2519] address review-agent feedback on dateTime cache …
RyanNg1403 May 1, 2026
d51b243
Merge pull request #591 from campfirein/feat/ENG-2519
RyanNg1403 May 2, 2026
06cc5d0
feat: [ENG-2530] pre-pipeline recon to skip first agent iteration
RyanNg1403 May 1, 2026
550cc70
test: [ENG-2530] cover recon pre-pipeline wiring on curate-executor
RyanNg1403 May 2, 2026
8941d79
refactor: [ENG-2530] address review-agent feedback on PR #593
RyanNg1403 May 2, 2026
311b306
Merge pull request #593 from campfirein/feat/ENG-2530
danhdoan May 2, 2026
1ae81cd
merge: sync main (3.10.1) into curation-enhancement
danhdoan May 4, 2026
d8c15bc
Merge pull request #599 from campfirein/chore/curation-enhancement-sy…
danhdoan May 4, 2026
3408a3d
merge: record main 3.10.1 as integrated (resolution already landed vi…
danhdoan May 4, 2026
0f40a05
Merge pull request #600 from campfirein/chore/curation-sync-main-3-10-1
danhdoan May 4, 2026
7389641
Merge branch 'main' into chore/curation-sync-main
danhdoan May 4, 2026
eea396b
Merge pull request #602 from campfirein/chore/curation-sync-main
danhdoan May 4, 2026
8b6167b
refactor: address review-agent feedback on PR #601
danhdoan May 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/agent/infra/agent/service-initializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,18 @@ export async function createCipherAgentServices(
basePath: promptsBasePath,
validateConfig: true,
})
// Register default contributors
// Register default contributors.
//
// Note: dateTime is intentionally NOT in the system prompt. Anthropic
// prompt caching does token-level prefix matching, so a per-iteration
// refreshed timestamp here would invalidate the cache for everything
// past it. dateTime is instead injected into the first user message
// by AgentLLMService, where it lives after the cache breakpoints and
// does not poison the cached prefix.
systemPromptManager.registerContributors([
{enabled: true, filepath: 'system-prompt.yml', id: 'base', priority: 0, type: 'file'},
{enabled: true, id: 'env', priority: 10, type: 'environment'},
{enabled: true, id: 'memories', priority: 20, type: 'memory'},
{enabled: true, id: 'datetime', priority: 30, type: 'dateTime'},
])

// Register context tree structure contributor for query/curate commands
Expand Down
48 changes: 29 additions & 19 deletions src/agent/infra/llm/agent-llm-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@
/** Target utilization ratio for message tokens (leaves headroom for response) */
const TARGET_MESSAGE_TOKEN_UTILIZATION = 0.7

/**
* Build a `<dateTime>...</dateTime>\n\n` prefix for a user-message body.
*
* Per-call timestamps must NOT enter the system prompt (they would poison
* the prefix cache). They are injected into the user message instead, at
* the boundaries where the model legitimately needs fresh time context:
* the iter-0 input, and after a rolling-checkpoint history clear.
*/
export function buildDateTimePrefix(now: Date = new Date()): string {
return `<dateTime>Current date and time: ${now.toISOString()}</dateTime>\n\n`
}

/**
* Result of parallel tool execution (before adding to context).
* Contains all information needed to add the result to context in order.
Expand Down Expand Up @@ -198,7 +210,7 @@
*
* @param sessionId - Unique identifier for this session
* @param generator - Content generator for LLM calls (with decorators pre-applied)
* @param config - LLM service configuration (model, tokens, temperature)

Check warning on line 213 in src/agent/infra/llm/agent-llm-service.ts

View workflow job for this annotation

GitHub Actions / lint

Missing @param "options.compressionStrategies"
* @param options - Service dependencies
* @param options.toolManager - Tool manager for executing agent tools
* @param options.systemPromptManager - System prompt manager for building system prompts
Expand Down Expand Up @@ -902,8 +914,11 @@
this.cachedBasePrompt = basePrompt
this.memoryDirtyFlag = false
} else {
// Cache hit: reuse base prompt, only refresh the DateTime section
basePrompt = this.refreshDateTime(this.cachedBasePrompt!)
// Cache hit: reuse base prompt verbatim. The cached prompt has no
// dateTime section to refresh — dateTime is injected into the
// first user message instead so the system prefix stays byte-stable
// across iterations and prompt caching can engage cleanly.
basePrompt = this.cachedBasePrompt!
}

let systemPrompt = basePrompt
Expand Down Expand Up @@ -944,9 +959,13 @@

// Add user message and compress context within mutex lock
return this.mutex.withLock(async () => {
// Add user message to context only on the first iteration
// Add user message to context only on the first iteration. The
// dateTime block is prefixed here (not in the system prompt) so
// the cached system prefix stays byte-stable across iterations
// and Anthropic/OpenAI/Google prefix caches can engage cleanly.
if (iterationCount === 0) {
await this.contextManager.addUserMessage(textInput, imageData, fileData)
const inputWithDateTime = `${buildDateTimePrefix()}${textInput}`
await this.contextManager.addUserMessage(inputWithDateTime, imageData, fileData)
}

// Rolling checkpoint: periodically save progress and clear history for RLM commands.
Expand Down Expand Up @@ -1540,8 +1559,12 @@
// Clear conversation history
await this.contextManager.clearHistory()

// Re-inject continuation prompt with variable reference
const continuationPrompt = [
// Re-inject continuation prompt with variable reference.
// Prepend the dateTime block: clearHistory wiped the iter-0 user
// message that originally carried it, and the iter-0 guard upstream
// prevents re-injection. Without this, every iteration after the
// first checkpoint loses time context for the rest of the run.
const continuationPrompt = buildDateTimePrefix() + [
`Continue task. Iteration checkpoint at turn ${iterationCount}.`,
`Previous progress stored in variable: ${checkpointVar}`,
`Original task: ${textInput.slice(0, 200)}${textInput.length > 200 ? '...' : ''}`,
Expand All @@ -1555,19 +1578,6 @@
})
}

/**
* Replace the DateTime section in a cached system prompt with a fresh timestamp.
* DateTimeContributor wraps its output in <dateTime>...</dateTime> XML tags,
* enabling reliable regex replacement without rebuilding the entire prompt.
*
* @param cachedPrompt - Previously cached system prompt
* @returns Updated prompt with fresh DateTime
*/
private refreshDateTime(cachedPrompt: string): string {
const freshDateTime = `<dateTime>Current date and time: ${new Date().toISOString()}</dateTime>`
return cachedPrompt.replace(/<dateTime>[\S\s]*?<\/dateTime>/, freshDateTime)
}

/**
* Check if a rolling checkpoint should trigger.
* Triggers every N iterations for curate/query commands, or when token utilization is high.
Expand Down
30 changes: 25 additions & 5 deletions src/agent/infra/llm/generators/ai-sdk-content-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* Replaces per-provider content generators with one unified implementation.
*/

import type {LanguageModel} from 'ai'
import type {LanguageModel, ModelMessage} from 'ai'

import {generateText, streamText} from 'ai'

Expand All @@ -22,6 +22,28 @@ import {toAiSdkTools, toModelMessages} from './ai-sdk-message-converter.js'

const DEFAULT_CHARS_PER_TOKEN = 4

/**
* Prepend the system prompt as a system-role message carrying
* `providerOptions.anthropic.cacheControl: ephemeral`. AI SDK's top-level
* `system: string` parameter does not propagate providerOptions, so the
* only way to attach Anthropic cache_control to the system block is to
* pass it through the messages array. Non-Anthropic providers ignore the
* `anthropic` namespace.
*/
export function prependCachedSystemMessage(systemPrompt: string | undefined, messages: ModelMessage[]): ModelMessage[] {
if (!systemPrompt) {
return messages
}

const systemMessage: ModelMessage = {
content: systemPrompt,
providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}},
role: 'system',
}

return [systemMessage, ...messages]
}

/**
* Configuration for AiSdkContentGenerator.
*/
Expand Down Expand Up @@ -54,7 +76,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
}

public async generateContent(request: GenerateContentRequest): Promise<GenerateContentResponse> {
const messages = toModelMessages(request.contents)
const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents))
const tools = toAiSdkTools(request.tools)

const result = await generateText({
Expand All @@ -63,7 +85,6 @@ export class AiSdkContentGenerator implements IContentGenerator {
messages,
model: this.model,
temperature: request.config.temperature,
...(request.systemPrompt && {system: request.systemPrompt}),
...(tools && {tools}),
...(request.config.topK !== undefined && {topK: request.config.topK}),
...(request.config.topP !== undefined && {topP: request.config.topP}),
Expand Down Expand Up @@ -100,7 +121,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
}

public async *generateContentStream(request: GenerateContentRequest): AsyncGenerator<GenerateContentChunk> {
const messages = toModelMessages(request.contents)
const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents))
const tools = toAiSdkTools(request.tools)

const result = streamText({
Expand All @@ -109,7 +130,6 @@ export class AiSdkContentGenerator implements IContentGenerator {
messages,
model: this.model,
temperature: request.config.temperature,
...(request.systemPrompt && {system: request.systemPrompt}),
...(tools && {tools}),
...(request.config.topK !== undefined && {topK: request.config.topK}),
...(request.config.topP !== undefined && {topP: request.config.topP}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,25 @@ export function toModelMessages(messages: InternalMessage[]): ModelMessage[] {
/**
* Convert our ToolSet to AI SDK tool definitions.
* Tools are declared without `execute` — our agentic loop handles execution.
*
* The last tool gets `providerOptions.anthropic.cacheControl: ephemeral`,
* which makes Anthropic cache the entire tool block (and the system prompt
* before it). Non-Anthropic providers ignore the `anthropic` namespace.
*/
export function toAiSdkTools(tools?: InternalToolSet): Record<string, ReturnType<typeof aiSdkTool>> | undefined {
if (!tools || Object.keys(tools).length === 0) {
return undefined
}

const entries = Object.entries(tools)
const result: Record<string, ReturnType<typeof aiSdkTool>> = {}

for (const [name, def] of Object.entries(tools)) {
for (const [index, [name, def]] of entries.entries()) {
const isLast = index === entries.length - 1
result[name] = aiSdkTool({
description: def.description ?? '',
inputSchema: jsonSchema(def.parameters as Record<string, unknown>),
...(isLast && {providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}}}),
Comment thread
danhdoan marked this conversation as resolved.
Comment thread
danhdoan marked this conversation as resolved.
})
}

Expand Down
Loading
Loading