campfirein
diff --git a/‎src/agent/infra/agent/service-initializer.ts‎
Lines changed: 8 additions & 2 deletions b/‎src/agent/infra/agent/service-initializer.ts‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/agent/infra/llm/agent-llm-service.ts‎
Lines changed: 29 additions & 19 deletions b/‎src/agent/infra/llm/agent-llm-service.ts‎
Lines changed: 29 additions & 19 deletions
diff --git a/‎src/agent/infra/llm/generators/ai-sdk-content-generator.ts‎
Lines changed: 25 additions & 5 deletions b/‎src/agent/infra/llm/generators/ai-sdk-content-generator.ts‎
Lines changed: 25 additions & 5 deletions
diff --git a/‎src/agent/infra/llm/generators/ai-sdk-message-converter.ts‎
Lines changed: 8 additions & 1 deletion b/‎src/agent/infra/llm/generators/ai-sdk-message-converter.ts‎
Lines changed: 8 additions & 1 deletion
@@ -193,12 +193,18 @@ export async function createCipherAgentServices(
     basePath: promptsBasePath,
     validateConfig: true,
   })
-  // Register default contributors
+  // Register default contributors.
+  //
+  // Note: dateTime is intentionally NOT in the system prompt. Anthropic
+  // prompt caching does token-level prefix matching, so a per-iteration
+  // refreshed timestamp here would invalidate the cache for everything
+  // past it. dateTime is instead injected into the first user message
+  // by AgentLLMService, where it lives after the cache breakpoints and
+  // does not poison the cached prefix.
   systemPromptManager.registerContributors([
     {enabled: true, filepath: 'system-prompt.yml', id: 'base', priority: 0, type: 'file'},
     {enabled: true, id: 'env', priority: 10, type: 'environment'},
     {enabled: true, id: 'memories', priority: 20, type: 'memory'},
-    {enabled: true, id: 'datetime', priority: 30, type: 'dateTime'},
   ])
 
   // Register context tree structure contributor for query/curate commands
 
@@ -60,6 +60,18 @@ import {type ProcessedOutput, ToolOutputProcessor, type TruncationConfig} from '
 /** Target utilization ratio for message tokens (leaves headroom for response) */
 const TARGET_MESSAGE_TOKEN_UTILIZATION = 0.7
 
+/**
+ * Build a `<dateTime>...</dateTime>\n\n` prefix for a user-message body.
+ *
+ * Per-call timestamps must NOT enter the system prompt (they would poison
+ * the prefix cache). They are injected into the user message instead, at
+ * the boundaries where the model legitimately needs fresh time context:
+ * the iter-0 input, and after a rolling-checkpoint history clear.
+ */
+export function buildDateTimePrefix(now: Date = new Date()): string {
+  return `<dateTime>Current date and time: ${now.toISOString()}</dateTime>\n\n`
+}
+
 /**
  * Result of parallel tool execution (before adding to context).
  * Contains all information needed to add the result to context in order.
@@ -902,8 +914,11 @@ export class AgentLLMService implements ILLMService {
       this.cachedBasePrompt = basePrompt
       this.memoryDirtyFlag = false
     } else {
-      // Cache hit: reuse base prompt, only refresh the DateTime section
-      basePrompt = this.refreshDateTime(this.cachedBasePrompt!)
+      // Cache hit: reuse base prompt verbatim. The cached prompt has no
+      // dateTime section to refresh — dateTime is injected into the
+      // first user message instead so the system prefix stays byte-stable
+      // across iterations and prompt caching can engage cleanly.
+      basePrompt = this.cachedBasePrompt!
     }
 
     let systemPrompt = basePrompt
@@ -944,9 +959,13 @@ export class AgentLLMService implements ILLMService {
 
     // Add user message and compress context within mutex lock
     return this.mutex.withLock(async () => {
-      // Add user message to context only on the first iteration
+      // Add user message to context only on the first iteration. The
+      // dateTime block is prefixed here (not in the system prompt) so
+      // the cached system prefix stays byte-stable across iterations
+      // and Anthropic/OpenAI/Google prefix caches can engage cleanly.
       if (iterationCount === 0) {
-        await this.contextManager.addUserMessage(textInput, imageData, fileData)
+        const inputWithDateTime = `${buildDateTimePrefix()}${textInput}`
+        await this.contextManager.addUserMessage(inputWithDateTime, imageData, fileData)
       }
 
       // Rolling checkpoint: periodically save progress and clear history for RLM commands.
@@ -1540,8 +1559,12 @@ export class AgentLLMService implements ILLMService {
     // Clear conversation history
     await this.contextManager.clearHistory()
 
-    // Re-inject continuation prompt with variable reference
-    const continuationPrompt = [
+    // Re-inject continuation prompt with variable reference.
+    // Prepend the dateTime block: clearHistory wiped the iter-0 user
+    // message that originally carried it, and the iter-0 guard upstream
+    // prevents re-injection. Without this, every iteration after the
+    // first checkpoint loses time context for the rest of the run.
+    const continuationPrompt = buildDateTimePrefix() + [
       `Continue task. Iteration checkpoint at turn ${iterationCount}.`,
       `Previous progress stored in variable: ${checkpointVar}`,
       `Original task: ${textInput.slice(0, 200)}${textInput.length > 200 ? '...' : ''}`,
@@ -1555,19 +1578,6 @@ export class AgentLLMService implements ILLMService {
     })
   }
 
-  /**
-   * Replace the DateTime section in a cached system prompt with a fresh timestamp.
-   * DateTimeContributor wraps its output in <dateTime>...</dateTime> XML tags,
-   * enabling reliable regex replacement without rebuilding the entire prompt.
-   *
-   * @param cachedPrompt - Previously cached system prompt
-   * @returns Updated prompt with fresh DateTime
-   */
-  private refreshDateTime(cachedPrompt: string): string {
-    const freshDateTime = `<dateTime>Current date and time: ${new Date().toISOString()}</dateTime>`
-    return cachedPrompt.replace(/<dateTime>[\S\s]*?<\/dateTime>/, freshDateTime)
-  }
-
   /**
    * Check if a rolling checkpoint should trigger.
    * Triggers every N iterations for curate/query commands, or when token utilization is high.
 
@@ -5,7 +5,7 @@
  * Replaces per-provider content generators with one unified implementation.
  */
 
-import type {LanguageModel} from 'ai'
+import type {LanguageModel, ModelMessage} from 'ai'
 
 import {generateText, streamText} from 'ai'
 
@@ -22,6 +22,28 @@ import {toAiSdkTools, toModelMessages} from './ai-sdk-message-converter.js'
 
 const DEFAULT_CHARS_PER_TOKEN = 4
 
+/**
+ * Prepend the system prompt as a system-role message carrying
+ * `providerOptions.anthropic.cacheControl: ephemeral`. AI SDK's top-level
+ * `system: string` parameter does not propagate providerOptions, so the
+ * only way to attach Anthropic cache_control to the system block is to
+ * pass it through the messages array. Non-Anthropic providers ignore the
+ * `anthropic` namespace.
+ */
+export function prependCachedSystemMessage(systemPrompt: string | undefined, messages: ModelMessage[]): ModelMessage[] {
+  if (!systemPrompt) {
+    return messages
+  }
+
+  const systemMessage: ModelMessage = {
+    content: systemPrompt,
+    providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}},
+    role: 'system',
+  }
+
+  return [systemMessage, ...messages]
+}
+
 /**
  * Configuration for AiSdkContentGenerator.
  */
@@ -54,7 +76,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
   }
 
   public async generateContent(request: GenerateContentRequest): Promise<GenerateContentResponse> {
-    const messages = toModelMessages(request.contents)
+    const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents))
     const tools = toAiSdkTools(request.tools)
 
     const result = await generateText({
@@ -63,7 +85,6 @@ export class AiSdkContentGenerator implements IContentGenerator {
       messages,
       model: this.model,
       temperature: request.config.temperature,
-      ...(request.systemPrompt && {system: request.systemPrompt}),
       ...(tools && {tools}),
       ...(request.config.topK !== undefined && {topK: request.config.topK}),
       ...(request.config.topP !== undefined && {topP: request.config.topP}),
@@ -100,7 +121,7 @@ export class AiSdkContentGenerator implements IContentGenerator {
   }
 
   public async *generateContentStream(request: GenerateContentRequest): AsyncGenerator<GenerateContentChunk> {
-    const messages = toModelMessages(request.contents)
+    const messages = prependCachedSystemMessage(request.systemPrompt, toModelMessages(request.contents))
     const tools = toAiSdkTools(request.tools)
 
     const result = streamText({
@@ -109,7 +130,6 @@ export class AiSdkContentGenerator implements IContentGenerator {
       messages,
       model: this.model,
       temperature: request.config.temperature,
-      ...(request.systemPrompt && {system: request.systemPrompt}),
       ...(tools && {tools}),
       ...(request.config.topK !== undefined && {topK: request.config.topK}),
       ...(request.config.topP !== undefined && {topP: request.config.topP}),
 
@@ -63,18 +63,25 @@ export function toModelMessages(messages: InternalMessage[]): ModelMessage[] {
 /**
  * Convert our ToolSet to AI SDK tool definitions.
  * Tools are declared without `execute` — our agentic loop handles execution.
+ *
+ * The last tool gets `providerOptions.anthropic.cacheControl: ephemeral`,
+ * which makes Anthropic cache the entire tool block (and the system prompt
+ * before it). Non-Anthropic providers ignore the `anthropic` namespace.
  */
 export function toAiSdkTools(tools?: InternalToolSet): Record<string, ReturnType<typeof aiSdkTool>> | undefined {
   if (!tools || Object.keys(tools).length === 0) {
     return undefined
   }
 
+  const entries = Object.entries(tools)
   const result: Record<string, ReturnType<typeof aiSdkTool>> = {}
 
-  for (const [name, def] of Object.entries(tools)) {
+  for (const [index, [name, def]] of entries.entries()) {
+    const isLast = index === entries.length - 1
     result[name] = aiSdkTool({
       description: def.description ?? '',
       inputSchema: jsonSchema(def.parameters as Record<string, unknown>),
+      ...(isLast && {providerOptions: {anthropic: {cacheControl: {type: 'ephemeral'}}}}),
     })
   }