Kilo-Org · marius-kilocode · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · kilo-code-bot
diff --git a/.changeset/model-aware-compaction.md b/.changeset/model-aware-compaction.md
@@ -0,0 +1,5 @@
+---
+"@kilocode/cli": patch
+---
+
+Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input.
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
@@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event"
 import { Bus } from "@/bus"
 import * as Session from "./session"
 import { SessionID, MessageID, PartID } from "./schema"
-import { Provider } from "../provider"
+import { Provider, ProviderTransform } from "../provider" // kilocode_change
 import { MessageV2 } from "./message-v2"
 import z from "zod"
 import { Token } from "../util"
@@ -31,6 +31,105 @@ export const Event = {
 export const PRUNE_MINIMUM = 20_000
 export const PRUNE_PROTECT = 40_000
 const PRUNE_PROTECTED_TOOLS = ["skill"]
+// kilocode_change start - model-aware compaction budgets
+const BUDGET_BUFFER = 20_000
+const BUDGET_NORMAL_RATIO = 0.2
+const BUDGET_OVERFLOW_RATIO = 0.05
+const BUDGET_PROMPT_RATIO = 0.1
+const BUDGET_NORMAL_MIN = 8_000
+const BUDGET_NORMAL_MAX = 60_000
+const BUDGET_OVERFLOW_MIN = 2_000
+const BUDGET_OVERFLOW_MAX = 15_000
+const BUDGET_OVERFLOW_TEXT_MIN = 500
+const BUDGET_OVERFLOW_TEXT_MAX = 2_000
+const BUDGET_OVERFLOW_TOOL_MIN = 500
+const BUDGET_OVERFLOW_TOOL_MAX = 4_000
+
+function clamp(input: { value: number; min: number; max: number }) {
+  return Math.max(input.min, Math.min(input.max, input.value))
+}
+
+function budget(input: { cfg: Config.Info; model: Provider.Model }) {
+  const output = ProviderTransform.maxOutputTokens(input.model)
+  const limit = input.model.limit.input || input.model.limit.context
+  const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output)
+  const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO)
+  const usable = Math.max(0, limit - reserved - prompt)
+  const available = usable
+  const normal = clamp({
+    value: Math.floor(available * BUDGET_NORMAL_RATIO),
+    min: BUDGET_NORMAL_MIN,
+    max: BUDGET_NORMAL_MAX,
+  })
+  const overflow = clamp({
+    value: Math.floor(available * BUDGET_OVERFLOW_RATIO),
+    min: BUDGET_OVERFLOW_MIN,
+    max: BUDGET_OVERFLOW_MAX,
+  })
+  return {
+    usable,
+    normal,
+    overflow,
+    tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }),
+    text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }),
+    messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80,
+  }
+}
+
+function truncate(input: { text: string; chars: number; label: string }) {
+  if (input.text.length <= input.chars) return input.text
+  return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]`
+}
+
+function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType<typeof budget> }) {
+  const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages
+  const state = { total: 0 }
+  return msgs.map((msg) => ({
+    ...msg,
+    parts: msg.parts.map((part) => {
+      if (part.type === "tool" && part.state.status === "completed") {
+        const estimate = Token.estimate(part.state.output)
+        state.total += estimate
+        if (state.total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part
+        return {
+          ...part,
+          state: {
+            ...part.state,
+            output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }),
+          },
+        }
+      }
+      if (part.type === "text" && part.synthetic) {
+        return {
+          ...part,
+          text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }),
+        }
+      }
+      if (part.type === "text") {
+        return {
+          ...part,
+          text: truncate({ text: part.text, chars: input.budget.tool, label: "overflow compaction" }),
+        }
+      }
+      return part
+    }),
+  }))
+}
+
+function sanitize(input: { part: MessageV2.Part; budget: ReturnType<typeof budget> }) {
+  if (input.part.type === "compaction") return undefined
+  if (input.part.type === "file" && MessageV2.isMedia(input.part.mime)) {
+    return { type: "text" as const, text: `[Attached ${input.part.mime}: ${input.part.filename ?? "file"}]` }
+  }
+  if (input.part.type === "text") {
+    return {
+      ...input.part,
+      text: truncate({ text: input.part.text, chars: input.budget.tool, label: "overflow replay" }),
+    }
+  }
+  return input.part
+}
+// kilocode_change end
 
 export interface Interface {
   readonly isOverflow: (input: {
@@ -96,6 +195,14 @@ export const layer: Layer.Layer<
         .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
       if (!msgs) return
 
+      // kilocode_change start - scale protected tool-output window with the active model
+      const last = msgs.findLast((msg) => msg.info.role === "user")
+      const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined
+      const cap = model ? budget({ cfg, model }) : undefined
+      const protect = cap ? cap.normal : PRUNE_PROTECT
+      const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM
+      // kilocode_change end
+
       let total = 0
       let pruned = 0
       const toPrune: MessageV2.ToolPart[] = []
@@ -114,7 +221,7 @@ export const layer: Layer.Layer<
               if (part.state.time.compacted) break loop
               const estimate = Token.estimate(part.state.output)
               total += estimate
-              if (total > PRUNE_PROTECT) {
+              if (total > protect) { // kilocode_change
                 pruned += estimate
                 toPrune.push(part)
               }
@@ -123,7 +230,7 @@ export const layer: Layer.Layer<
       }
 
       log.info("found", { pruned, total })
-      if (pruned > PRUNE_MINIMUM) {
+      if (pruned > minimum) { // kilocode_change
         for (const part of toPrune) {
           if (part.state.status === "completed") {
             part.state.time.compacted = Date.now()
@@ -176,6 +283,18 @@ export const layer: Layer.Layer<
       const model = agent.model
         ? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
         : yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
+      // kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead
+      const cap = yield* Effect.gen(function* () {
+        if (!input.overflow) return undefined
+        const cfg = yield* config.get()
+        const cap = budget({ cfg, model })
+        if (messages.length > cap.messages) {
+          log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages })
+        }
+        messages = shrink({ messages, budget: cap })
+        return cap
+      })
+      // kilocode_change end
       // Allow plugins to inject context or replace compaction prompt.
       const compacting = yield* plugin.trigger(
         "experimental.session.compacting",
@@ -291,17 +410,16 @@ When constructing the summary, try to stick to this template:
             system: original.system,
           })
           for (const part of replay.parts) {
-            if (part.type === "compaction") continue
-            const replayPart =
-              part.type === "file" && MessageV2.isMedia(part.mime)
-                ? { type: "text" as const, text: `[Attached ${part.mime}: ${part.filename ?? "file"}]` }
-                : part
+            // kilocode_change start - shrink replayed overflow content before auto-continuing
+            const cleaned = cap ? sanitize({ part, budget: cap }) : part
+            if (!cleaned) continue
             yield* session.updatePart({
-              ...replayPart,
+              ...cleaned,
               id: PartID.ascending(),
               messageID: replayMsg.id,
               sessionID: input.sessionID,
             })
+            // kilocode_change end
           }
         }