diff --git a/.changeset/model-aware-compaction.md b/.changeset/model-aware-compaction.md new file mode 100644 index 00000000000..8ce850a0441 --- /dev/null +++ b/.changeset/model-aware-compaction.md @@ -0,0 +1,5 @@ +--- +"@kilocode/cli": patch +--- + +Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input. diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 212f5fdbab8..3c09a2a625e 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event" import { Bus } from "@/bus" import * as Session from "./session" import { SessionID, MessageID, PartID } from "./schema" -import { Provider } from "../provider" +import { Provider, ProviderTransform } from "../provider" // kilocode_change import { MessageV2 } from "./message-v2" import z from "zod" import { Token } from "../util" @@ -31,6 +31,105 @@ export const Event = { export const PRUNE_MINIMUM = 20_000 export const PRUNE_PROTECT = 40_000 const PRUNE_PROTECTED_TOOLS = ["skill"] +// kilocode_change start - model-aware compaction budgets +const BUDGET_BUFFER = 20_000 +const BUDGET_NORMAL_RATIO = 0.2 +const BUDGET_OVERFLOW_RATIO = 0.05 +const BUDGET_PROMPT_RATIO = 0.1 +const BUDGET_NORMAL_MIN = 8_000 +const BUDGET_NORMAL_MAX = 60_000 +const BUDGET_OVERFLOW_MIN = 2_000 +const BUDGET_OVERFLOW_MAX = 15_000 +const BUDGET_OVERFLOW_TEXT_MIN = 500 +const BUDGET_OVERFLOW_TEXT_MAX = 2_000 +const BUDGET_OVERFLOW_TOOL_MIN = 500 +const BUDGET_OVERFLOW_TOOL_MAX = 4_000 + +function clamp(input: { value: number; min: number; max: number }) { + return Math.max(input.min, Math.min(input.max, input.value)) +} + +function budget(input: { cfg: Config.Info; model: Provider.Model }) { + const output = ProviderTransform.maxOutputTokens(input.model) + const limit = input.model.limit.input || input.model.limit.context + const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output) + const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO) + const usable = Math.max(0, limit - reserved - prompt) + const available = usable + const normal = clamp({ + value: Math.floor(available * BUDGET_NORMAL_RATIO), + min: BUDGET_NORMAL_MIN, + max: BUDGET_NORMAL_MAX, + }) + const overflow = clamp({ + value: Math.floor(available * BUDGET_OVERFLOW_RATIO), + min: BUDGET_OVERFLOW_MIN, + max: BUDGET_OVERFLOW_MAX, + }) + return { + usable, + normal, + overflow, + tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }), + text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }), + messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80, + } +} + +function truncate(input: { text: string; chars: number; label: string }) { + if (input.text.length <= input.chars) return input.text + return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]` +} + +function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType }) { + const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages + const state = { total: 0 } + return msgs.map((msg) => ({ + ...msg, + parts: msg.parts.map((part) => { + if (part.type === "tool" && part.state.status === "completed") { + const estimate = Token.estimate(part.state.output) + state.total += estimate + if (state.total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part + return { + ...part, + state: { + ...part.state, + output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }), + }, + } + } + if (part.type === "text" && part.synthetic) { + return { + ...part, + text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }), + } + } + if (part.type === "text") { + return { + ...part, + text: truncate({ text: part.text, chars: input.budget.tool, label: "overflow compaction" }), + } + } + return part + }), + })) +} + +function sanitize(input: { part: MessageV2.Part; budget: ReturnType }) { + if (input.part.type === "compaction") return undefined + if (input.part.type === "file" && MessageV2.isMedia(input.part.mime)) { + return { type: "text" as const, text: `[Attached ${input.part.mime}: ${input.part.filename ?? "file"}]` } + } + if (input.part.type === "text") { + return { + ...input.part, + text: truncate({ text: input.part.text, chars: input.budget.tool, label: "overflow replay" }), + } + } + return input.part +} +// kilocode_change end export interface Interface { readonly isOverflow: (input: { @@ -96,6 +195,14 @@ export const layer: Layer.Layer< .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined))) if (!msgs) return + // kilocode_change start - scale protected tool-output window with the active model + const last = msgs.findLast((msg) => msg.info.role === "user") + const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined + const cap = model ? budget({ cfg, model }) : undefined + const protect = cap ? cap.normal : PRUNE_PROTECT + const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM + // kilocode_change end + let total = 0 let pruned = 0 const toPrune: MessageV2.ToolPart[] = [] @@ -114,7 +221,7 @@ export const layer: Layer.Layer< if (part.state.time.compacted) break loop const estimate = Token.estimate(part.state.output) total += estimate - if (total > PRUNE_PROTECT) { + if (total > protect) { // kilocode_change pruned += estimate toPrune.push(part) } @@ -123,7 +230,7 @@ export const layer: Layer.Layer< } log.info("found", { pruned, total }) - if (pruned > PRUNE_MINIMUM) { + if (pruned > minimum) { // kilocode_change for (const part of toPrune) { if (part.state.status === "completed") { part.state.time.compacted = Date.now() @@ -176,6 +283,18 @@ export const layer: Layer.Layer< const model = agent.model ? yield* provider.getModel(agent.model.providerID, agent.model.modelID) : yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID) + // kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead + const cap = yield* Effect.gen(function* () { + if (!input.overflow) return undefined + const cfg = yield* config.get() + const cap = budget({ cfg, model }) + if (messages.length > cap.messages) { + log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages }) + } + messages = shrink({ messages, budget: cap }) + return cap + }) + // kilocode_change end // Allow plugins to inject context or replace compaction prompt. const compacting = yield* plugin.trigger( "experimental.session.compacting", @@ -291,17 +410,16 @@ When constructing the summary, try to stick to this template: system: original.system, }) for (const part of replay.parts) { - if (part.type === "compaction") continue - const replayPart = - part.type === "file" && MessageV2.isMedia(part.mime) - ? { type: "text" as const, text: `[Attached ${part.mime}: ${part.filename ?? "file"}]` } - : part + // kilocode_change start - shrink replayed overflow content before auto-continuing + const cleaned = cap ? sanitize({ part, budget: cap }) : part + if (!cleaned) continue yield* session.updatePart({ - ...replayPart, + ...cleaned, id: PartID.ascending(), messageID: replayMsg.id, sessionID: input.sessionID, }) + // kilocode_change end } } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 423409fc3dc..50bfecd67bc 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -97,7 +97,8 @@ function createModel(opts: { } as Provider.Model } -const wide = () => ProviderTest.fake({ model: createModel({ context: 100_000, output: 32_000 }) }) +const base = createModel({ context: 100_000, output: 32_000 }) // kilocode_change +const wide = () => ProviderTest.fake({ model: base }) // kilocode_change async function user(sessionID: SessionID, text: string) { const msg = await svc.updateMessage({ @@ -167,6 +168,51 @@ function layer(result: "continue" | "compact") { ) } +// kilocode_change start - capture compaction processor input for budget tests +function captureLayer() { + const captured: LLM.StreamInput[] = [] + const proc = Layer.succeed( + SessionProcessorModule.SessionProcessor.Service, + SessionProcessorModule.SessionProcessor.Service.of({ + create: Effect.fn("CaptureSessionProcessor.create")((input) => { + const msg = input.assistantMessage + return Effect.succeed({ + get message() { + return msg + }, + updateToolCall: Effect.fn("CaptureSessionProcessor.updateToolCall")(() => Effect.succeed(undefined)), + completeToolCall: Effect.fn("CaptureSessionProcessor.completeToolCall")(() => Effect.void), + process: Effect.fn("CaptureSessionProcessor.process")((input: LLM.StreamInput) => { + captured.push(input) + return Effect.succeed("continue" as const) + }), + } satisfies SessionProcessorModule.SessionProcessor.Handle) + }), + }), + ) + return { captured, proc } +} + +function captureRuntime( + proc: Layer.Layer, + plugin = Plugin.defaultLayer, + provider = wide(), +) { + const bus = Bus.layer + return ManagedRuntime.make( + Layer.mergeAll(SessionCompaction.layer, bus).pipe( + Layer.provide(provider.layer), + Layer.provide(SessionNs.defaultLayer), + Layer.provide(proc), + Layer.provide(Agent.defaultLayer), + Layer.provide(plugin), + Layer.provide(bus), + Layer.provide(Config.defaultLayer), + ), + ) +} +// kilocode_change end + function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) { const bus = Bus.layer return ManagedRuntime.make( @@ -183,7 +229,7 @@ function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, p } const deps = Layer.mergeAll( - ProviderTest.fake().layer, + wide().layer, // kilocode_change layer("continue"), Agent.defaultLayer, Plugin.defaultLayer, @@ -279,6 +325,22 @@ function autocontinue(enabled: boolean) { }) } +// kilocode_change start - plugin helper for overflow shrinking tests +function contextPlugin(text: string) { + return Layer.mock(Plugin.Service)({ + trigger: (name: Name, _input: Input, output: Output) => { + if (name !== "experimental.session.compacting") return Effect.succeed(output) + return Effect.sync(() => { + ;(output as { context: string[] }).context.push(text) + return output + }) + }, + list: () => Effect.succeed([]), + init: () => Effect.void, + }) +} +// kilocode_change end + describe("session.compaction.isOverflow", () => { it.live( "returns true when token count exceeds usable context", @@ -586,6 +648,97 @@ describe("session.compaction.prune", () => { ), ) + // kilocode_change start - normal pruning should scale with model budget + it.live( + "prunes old tool output below the old fixed 40k token threshold on smaller models", + provideTmpdirInstance((dir) => + Effect.gen(function* () { + const compact = yield* SessionCompaction.Service + const ssn = yield* SessionNs.Service + const info = yield* ssn.create({}) + const a = yield* ssn.updateMessage({ + id: MessageID.ascending(), + role: "user", + sessionID: info.id, + agent: "build", + model: ref, + time: { created: Date.now() }, + }) + yield* ssn.updatePart({ + id: PartID.ascending(), + messageID: a.id, + sessionID: info.id, + type: "text", + text: "first", + }) + const b: MessageV2.Assistant = { + id: MessageID.ascending(), + role: "assistant", + sessionID: info.id, + mode: "build", + agent: "build", + path: { cwd: dir, root: dir }, + cost: 0, + tokens: { + output: 0, + input: 0, + reasoning: 0, + cache: { read: 0, write: 0 }, + }, + modelID: ref.modelID, + providerID: ref.providerID, + parentID: a.id, + time: { created: Date.now() }, + finish: "end_turn", + } + yield* ssn.updateMessage(b) + yield* ssn.updatePart({ + id: PartID.ascending(), + messageID: b.id, + sessionID: info.id, + type: "tool", + callID: crypto.randomUUID(), + tool: "bash", + state: { + status: "completed", + input: {}, + output: "x".repeat(60_000), + title: "done", + metadata: {}, + time: { start: Date.now(), end: Date.now() }, + }, + }) + for (const text of ["second", "third"]) { + const msg = yield* ssn.updateMessage({ + id: MessageID.ascending(), + role: "user", + sessionID: info.id, + agent: "build", + model: ref, + time: { created: Date.now() }, + }) + yield* ssn.updatePart({ + id: PartID.ascending(), + messageID: msg.id, + sessionID: info.id, + type: "text", + text, + }) + } + + yield* compact.prune({ sessionID: info.id }) + + const msgs = yield* ssn.messages({ sessionID: info.id }) + const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool") + expect(part?.type).toBe("tool") + if (part?.type === "tool" && part.state.status === "completed") { + expect(part.state.time.compacted).toBeNumber() + } + }), + ), + ) + // kilocode_change end + it.live( "skips protected skill tool output", provideTmpdirInstance((dir) => @@ -887,6 +1040,16 @@ describe("session.compaction.process", () => { const session = await svc.create({}) await user(session.id, "root") const replay = await user(session.id, "image") + // kilocode_change start - overflow replay should not resend huge text verbatim + const huge = "z".repeat(400_000) + await svc.updatePart({ + id: PartID.ascending(), + messageID: replay.id, + sessionID: session.id, + type: "text", + text: huge, + }) + // kilocode_change end await svc.updatePart({ id: PartID.ascending(), messageID: replay.id, @@ -920,6 +1083,14 @@ describe("session.compaction.process", () => { expect( last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")), ).toBe(true) + // kilocode_change start - replayed text is truncated, original replay parts are not resent verbatim + expect(last?.parts.some((part) => part.type === "text" && part.text === huge)).toBe(false) + expect( + last?.parts.some( + (part) => part.type === "text" && part.text.includes("truncated") && !part.text.includes("z".repeat(10_000)), + ), + ).toBe(true) + // kilocode_change end } finally { await rt.dispose() } @@ -1199,6 +1370,132 @@ describe("session.compaction.process", () => { }, }) }) + + // kilocode_change start - overflow compaction gets a stricter model-aware input budget + test("shrinks overflow compaction input without mutating stored messages", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const session = await svc.create({}) + const root = await user(session.id, "root") + const reply = await assistant(session.id, root.id, tmp.path) + const output = "x".repeat(80_000) + await svc.updatePart({ + id: PartID.ascending(), + messageID: reply.id, + sessionID: session.id, + type: "tool", + callID: crypto.randomUUID(), + tool: "bash", + state: { + status: "completed", + input: {}, + output, + title: "done", + metadata: {}, + time: { start: Date.now(), end: Date.now() }, + }, + }) + const synthetic = await user(session.id, "synthetic") + await svc.updatePart({ + id: PartID.ascending(), + messageID: synthetic.id, + sessionID: session.id, + type: "text", + synthetic: true, + text: "y".repeat(20_000), + }) + const normal = await user(session.id, "normal") + const huge = "z".repeat(400_000) + await svc.updatePart({ + id: PartID.ascending(), + messageID: normal.id, + sessionID: session.id, + type: "text", + text: huge, + }) + await user(session.id, "replay") + const msg = await user(session.id, "current") + const { captured, proc } = captureLayer() + const rt = captureRuntime(proc) + try { + const msgs = await svc.messages({ sessionID: session.id }) + const result = await rt.runPromise( + SessionCompaction.Service.use((svc) => + svc.process({ + parentID: msg.id, + messages: msgs, + sessionID: session.id, + auto: true, + overflow: true, + }), + ), + ) + + expect(result).toBe("continue") + expect(captured.length).toBe(1) + const body = JSON.stringify(captured[0].messages) + expect(body).not.toContain("x".repeat(10_000)) + expect(body).not.toContain("y".repeat(10_000)) + expect(body).toContain("z".repeat(100)) + expect(body).not.toContain("z".repeat(10_000)) + expect(body).toContain("truncated") + + const stored = await svc.messages({ sessionID: session.id }) + const part = stored.flatMap((msg) => msg.parts).find((part) => part.type === "tool") + expect(part?.type).toBe("tool") + if (part?.type === "tool" && part.state.status === "completed") { + expect(part.state.output).toBe(output) + } + expect(stored.flatMap((msg) => msg.parts).some((part) => part.type === "text" && part.text === huge)).toBe(true) + } finally { + await rt.dispose() + } + }, + }) + }) + + test("shrinks older overflow messages before plugin context is added", async () => { + await using tmp = await tmpdir() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const session = await svc.create({}) + for (let i = 0; i < 80; i++) { + await user(session.id, `msg-${String(i).padStart(3, "0")}`) + } + const msg = await user(session.id, "current") + const { captured, proc } = captureLayer() + const rt = captureRuntime(proc, contextPlugin("mcp-tool-schema-context-" + "z".repeat(20_000))) + try { + const msgs = await svc.messages({ sessionID: session.id }) + const result = await rt.runPromise( + SessionCompaction.Service.use((svc) => + svc.process({ + parentID: msg.id, + messages: msgs, + sessionID: session.id, + auto: true, + overflow: true, + }), + ), + ) + + expect(result).toBe("continue") + expect(captured.length).toBe(1) + const text = JSON.stringify(captured[0].messages) + expect(text).toContain("mcp-tool-schema-context") + expect(text).not.toContain("msg-000") + expect(text).not.toContain("msg-020") + expect(text).toContain("msg-078") + } finally { + await rt.dispose() + } + }, + }) + }) + // kilocode_change end }) describe("util.token.estimate", () => {