diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts index 5c45013f8d..8b1f1389c4 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.test.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.test.ts @@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => { ); }); + it.effect("clamps oversized Claude usage to the reported context window", () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe( + Stream.runCollect, + Effect.forkChild, + ); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: "claudeAgent", + runtimeMode: "full-access", + }); + + yield* adapter.sendTurn({ + threadId: THREAD_ID, + input: "hello", + attachments: [], + }); + + harness.query.emit({ + type: "result", + subtype: "success", + is_error: false, + duration_ms: 1234, + duration_api_ms: 1200, + num_turns: 1, + result: "done", + stop_reason: "end_turn", + session_id: "sdk-session-result-usage-clamped", + usage: { + total_tokens: 535000, + }, + modelUsage: { + "claude-opus-4-6": { + contextWindow: 200000, + maxOutputTokens: 64000, + }, + }, + } as unknown as SDKMessage); + harness.query.finish(); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated"); + assert.equal(usageEvent?.type, "thread.token-usage.updated"); + if (usageEvent?.type === "thread.token-usage.updated") { + assert.deepEqual(usageEvent.payload, { + usage: { + usedTokens: 200000, + lastUsedTokens: 200000, + totalProcessedTokens: 535000, + maxTokens: 200000, + }, + }); + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }); + + it.effect( + "preserves oversized Claude result totals after task progress snapshots are recorded", + () => { + const harness = makeHarness(); + return Effect.gen(function* () { + const adapter = yield* ClaudeAdapter; + + const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe( + Stream.runCollect, + Effect.forkChild, + ); + + yield* adapter.startSession({ + threadId: THREAD_ID, + provider: "claudeAgent", + runtimeMode: "full-access", + }); + + yield* adapter.sendTurn({ + threadId: THREAD_ID, + input: "hello", + attachments: [], + }); + + harness.query.emit({ + type: "system", + subtype: "task_progress", + task_id: "task-usage-clamped", + description: "Thinking through the patch", + usage: { + total_tokens: 190000, + }, + session_id: "sdk-session-task-usage-clamped", + uuid: "task-usage-progress-clamped", + } as unknown as SDKMessage); + + harness.query.emit({ + type: "result", + subtype: "success", + is_error: false, + duration_ms: 1234, + duration_api_ms: 1200, + num_turns: 1, + result: "done", + stop_reason: "end_turn", + session_id: "sdk-session-result-usage-clamped-after-progress", + usage: { + total_tokens: 535000, + }, + modelUsage: { + "claude-opus-4-6": { + contextWindow: 200000, + maxOutputTokens: 64000, + }, + }, + } as unknown as SDKMessage); + harness.query.finish(); + + const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber)); + const usageEvents = runtimeEvents.filter( + (event) => event.type === "thread.token-usage.updated", + ); + const finalUsageEvent = usageEvents.at(-1); + assert.equal(finalUsageEvent?.type, "thread.token-usage.updated"); + if (finalUsageEvent?.type === "thread.token-usage.updated") { + assert.deepEqual(finalUsageEvent.payload, { + usage: { + usedTokens: 190000, + lastUsedTokens: 190000, + totalProcessedTokens: 535000, + maxTokens: 200000, + }, + }); + } + }).pipe( + Effect.provideService(Random.Random, makeDeterministicRandomService()), + Effect.provide(harness.layer), + ); + }, + ); + it.effect( "emits completion only after turn result when assistant frames arrive before deltas", () => { diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index fb32da78c5..1a0657c499 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -17,6 +17,8 @@ import { type SDKResultMessage, type SettingSource, type SDKUserMessage, + ModelUsage, + NonNullableUsage, } from "@anthropic-ai/claude-agent-sdk"; import { ApprovalRequestId, @@ -272,24 +274,14 @@ function asRuntimeItemId(value: string): RuntimeItemId { return RuntimeItemId.make(value); } -function maxClaudeContextWindowFromModelUsage(modelUsage: unknown): number | undefined { - if (!modelUsage || typeof modelUsage !== "object") { - return undefined; - } +function maxClaudeContextWindowFromModelUsage( + modelUsage: Record | undefined, +): number | undefined { + if (!modelUsage) return undefined; let maxContextWindow: number | undefined; - for (const value of Object.values(modelUsage as Record)) { - if (!value || typeof value !== "object") { - continue; - } - const contextWindow = (value as { contextWindow?: unknown }).contextWindow; - if ( - typeof contextWindow !== "number" || - !Number.isFinite(contextWindow) || - contextWindow <= 0 - ) { - continue; - } + for (const value of Object.values(modelUsage)) { + const contextWindow = value.contextWindow; maxContextWindow = Math.max(maxContextWindow ?? 0, contextWindow); } @@ -297,53 +289,58 @@ function maxClaudeContextWindowFromModelUsage(modelUsage: unknown): number | und } function normalizeClaudeTokenUsage( - usage: unknown, + value: NonNullableUsage | undefined, contextWindow?: number, ): ThreadTokenUsageSnapshot | undefined { - if (!usage || typeof usage !== "object") { + if (!value || typeof value !== "object") { return undefined; } - const record = usage as Record; - const directUsedTokens = - typeof record.total_tokens === "number" && Number.isFinite(record.total_tokens) - ? record.total_tokens - : undefined; + const usage = value as Record; const inputTokens = - (typeof record.input_tokens === "number" && Number.isFinite(record.input_tokens) - ? record.input_tokens + (typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens) + ? usage.input_tokens : 0) + - (typeof record.cache_creation_input_tokens === "number" && - Number.isFinite(record.cache_creation_input_tokens) - ? record.cache_creation_input_tokens + (typeof usage.cache_creation_input_tokens === "number" && + Number.isFinite(usage.cache_creation_input_tokens) + ? usage.cache_creation_input_tokens : 0) + - (typeof record.cache_read_input_tokens === "number" && - Number.isFinite(record.cache_read_input_tokens) - ? record.cache_read_input_tokens + (typeof usage.cache_read_input_tokens === "number" && + Number.isFinite(usage.cache_read_input_tokens) + ? usage.cache_read_input_tokens : 0); const outputTokens = - typeof record.output_tokens === "number" && Number.isFinite(record.output_tokens) - ? record.output_tokens + typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens) + ? usage.output_tokens : 0; - const derivedUsedTokens = inputTokens + outputTokens; - const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined); - if (usedTokens === undefined || usedTokens <= 0) { + const derivedTotalProcessedTokens = inputTokens + outputTokens; + const totalProcessedTokens = + (typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens) + ? usage.total_tokens + : undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined); + if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) { return undefined; } + const maxTokens = + typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0 + ? contextWindow + : undefined; + const usedTokens = + maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens; + return { usedTokens, lastUsedTokens: usedTokens, + ...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}), ...(inputTokens > 0 ? { inputTokens } : {}), ...(outputTokens > 0 ? { outputTokens } : {}), - ...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0 - ? { maxTokens: contextWindow } - : {}), - ...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses) - ? { toolUses: record.tool_uses } + ...(maxTokens !== undefined ? { maxTokens } : {}), + ...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses) + ? { toolUses: usage.tool_uses } : {}), - ...(typeof record.duration_ms === "number" && Number.isFinite(record.duration_ms) - ? { durationMs: record.duration_ms } + ...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms) + ? { durationMs: usage.duration_ms } : {}), }; } @@ -1328,8 +1325,6 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( errorMessage?: string, result?: SDKResultMessage, ) { - const resultUsage = - result?.usage && typeof result.usage === "object" ? { ...result.usage } : undefined; const resultContextWindow = maxClaudeContextWindowFromModelUsage(result?.modelUsage); if (resultContextWindow !== undefined) { context.lastKnownContextWindow = resultContextWindow; @@ -1341,9 +1336,11 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( // Instead, use the last known context-window-accurate usage from task_progress // events and treat the accumulated total as totalProcessedTokens. const accumulatedSnapshot = normalizeClaudeTokenUsage( - resultUsage, + result?.usage, resultContextWindow ?? context.lastKnownContextWindow, ); + const accumulatedTotalProcessedTokens = + accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens; const lastGoodUsage = context.lastKnownTokenUsage; const maxTokens = resultContextWindow ?? context.lastKnownContextWindow; const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage @@ -1352,8 +1349,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( ...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0 ? { maxTokens } : {}), - ...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens - ? { totalProcessedTokens: accumulatedSnapshot.usedTokens } + ...(typeof accumulatedTotalProcessedTokens === "number" && + Number.isFinite(accumulatedTotalProcessedTokens) && + accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens + ? { totalProcessedTokens: accumulatedTotalProcessedTokens } : {}), } : accumulatedSnapshot;