diff --git a/.changeset/model-aware-compaction.md b/.changeset/model-aware-compaction.md
new file mode 100644
index 00000000000..8ce850a0441
--- /dev/null
+++ b/.changeset/model-aware-compaction.md
@@ -0,0 +1,5 @@
+---
+"@kilocode/cli": patch
+---
+
+Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input.
diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts
index 212f5fdbab8..3c09a2a625e 100644
--- a/packages/opencode/src/session/compaction.ts
+++ b/packages/opencode/src/session/compaction.ts
@@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event"
 import { Bus } from "@/bus"
 import * as Session from "./session"
 import { SessionID, MessageID, PartID } from "./schema"
-import { Provider } from "../provider"
+import { Provider, ProviderTransform } from "../provider" // kilocode_change
 import { MessageV2 } from "./message-v2"
 import z from "zod"
 import { Token } from "../util"
@@ -31,6 +31,105 @@ export const Event = {
 export const PRUNE_MINIMUM = 20_000
 export const PRUNE_PROTECT = 40_000
 const PRUNE_PROTECTED_TOOLS = ["skill"]
+// kilocode_change start - model-aware compaction budgets
+const BUDGET_BUFFER = 20_000
+const BUDGET_NORMAL_RATIO = 0.2
+const BUDGET_OVERFLOW_RATIO = 0.05
+const BUDGET_PROMPT_RATIO = 0.1
+const BUDGET_NORMAL_MIN = 8_000
+const BUDGET_NORMAL_MAX = 60_000
+const BUDGET_OVERFLOW_MIN = 2_000
+const BUDGET_OVERFLOW_MAX = 15_000
+const BUDGET_OVERFLOW_TEXT_MIN = 500
+const BUDGET_OVERFLOW_TEXT_MAX = 2_000
+const BUDGET_OVERFLOW_TOOL_MIN = 500
+const BUDGET_OVERFLOW_TOOL_MAX = 4_000
+
+function clamp(input: { value: number; min: number; max: number }) {
+  return Math.max(input.min, Math.min(input.max, input.value))
+}
+
+function budget(input: { cfg: Config.Info; model: Provider.Model }) {
+  const output = ProviderTransform.maxOutputTokens(input.model)
+  const limit = input.model.limit.input || input.model.limit.context
+  const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output)
+  const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO)
+  const usable = Math.max(0, limit - reserved - prompt)
+  const available = usable
+  const normal = clamp({
+    value: Math.floor(available * BUDGET_NORMAL_RATIO),
+    min: BUDGET_NORMAL_MIN,
+    max: BUDGET_NORMAL_MAX,
+  })
+  const overflow = clamp({
+    value: Math.floor(available * BUDGET_OVERFLOW_RATIO),
+    min: BUDGET_OVERFLOW_MIN,
+    max: BUDGET_OVERFLOW_MAX,
+  })
+  return {
+    usable,
+    normal,
+    overflow,
+    tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }),
+    text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }),
+    messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80,
+  }
+}
+
+function truncate(input: { text: string; chars: number; label: string }) {
+  if (input.text.length <= input.chars) return input.text
+  return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]`
+}
+
+function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType<typeof budget> }) {
+  const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages
+  const state = { total: 0 }
+  return msgs.map((msg) => ({
+    ...msg,
+    parts: msg.parts.map((part) => {
+      if (part.type === "tool" && part.state.status === "completed") {
+        const estimate = Token.estimate(part.state.output)
+        state.total += estimate
+        if (state.total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part
+        return {
+          ...part,
+          state: {
+            ...part.state,
+            output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }),
+          },
+        }
+      }
+      if (part.type === "text" && part.synthetic) {
+        return {
+          ...part,
+          text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }),
+        }
+      }
+      if (part.type === "text") {
+        return {
+          ...part,
+          text: truncate({ text: part.text, chars: input.budget.tool, label: "overflow compaction" }),
+        }
+      }
+      return part
+    }),
+  }))
+}
+
+function sanitize(input: { part: MessageV2.Part; budget: ReturnType<typeof budget> }) {
+  if (input.part.type === "compaction") return undefined
+  if (input.part.type === "file" && MessageV2.isMedia(input.part.mime)) {
+    return { type: "text" as const, text: `[Attached ${input.part.mime}: ${input.part.filename ?? "file"}]` }
+  }
+  if (input.part.type === "text") {
+    return {
+      ...input.part,
+      text: truncate({ text: input.part.text, chars: input.budget.tool, label: "overflow replay" }),
+    }
+  }
+  return input.part
+}
+// kilocode_change end
 
 export interface Interface {
   readonly isOverflow: (input: {
@@ -96,6 +195,14 @@ export const layer: Layer.Layer<
         .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
       if (!msgs) return
 
+      // kilocode_change start - scale protected tool-output window with the active model
+      const last = msgs.findLast((msg) => msg.info.role === "user")
+      const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined
+      const cap = model ? budget({ cfg, model }) : undefined
+      const protect = cap ? cap.normal : PRUNE_PROTECT
+      const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM
+      // kilocode_change end
+
       let total = 0
       let pruned = 0
       const toPrune: MessageV2.ToolPart[] = []
@@ -114,7 +221,7 @@ export const layer: Layer.Layer<
               if (part.state.time.compacted) break loop
               const estimate = Token.estimate(part.state.output)
               total += estimate
-              if (total > PRUNE_PROTECT) {
+              if (total > protect) { // kilocode_change
                 pruned += estimate
                 toPrune.push(part)
               }
@@ -123,7 +230,7 @@ export const layer: Layer.Layer<
       }
 
       log.info("found", { pruned, total })
-      if (pruned > PRUNE_MINIMUM) {
+      if (pruned > minimum) { // kilocode_change
         for (const part of toPrune) {
           if (part.state.status === "completed") {
             part.state.time.compacted = Date.now()
@@ -176,6 +283,18 @@ export const layer: Layer.Layer<
       const model = agent.model
         ? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
         : yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
+      // kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead
+      const cap = yield* Effect.gen(function* () {
+        if (!input.overflow) return undefined
+        const cfg = yield* config.get()
+        const cap = budget({ cfg, model })
+        if (messages.length > cap.messages) {
+          log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages })
+        }
+        messages = shrink({ messages, budget: cap })
+        return cap
+      })
+      // kilocode_change end
       // Allow plugins to inject context or replace compaction prompt.
       const compacting = yield* plugin.trigger(
         "experimental.session.compacting",
@@ -291,17 +410,16 @@ When constructing the summary, try to stick to this template:
             system: original.system,
           })
           for (const part of replay.parts) {
-            if (part.type === "compaction") continue
-            const replayPart =
-              part.type === "file" && MessageV2.isMedia(part.mime)
-                ? { type: "text" as const, text: `[Attached ${part.mime}: ${part.filename ?? "file"}]` }
-                : part
+            // kilocode_change start - shrink replayed overflow content before auto-continuing
+            const cleaned = cap ? sanitize({ part, budget: cap }) : part
+            if (!cleaned) continue
             yield* session.updatePart({
-              ...replayPart,
+              ...cleaned,
               id: PartID.ascending(),
               messageID: replayMsg.id,
               sessionID: input.sessionID,
             })
+            // kilocode_change end
           }
         }
 
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index 423409fc3dc..50bfecd67bc 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -97,7 +97,8 @@ function createModel(opts: {
   } as Provider.Model
 }
 
-const wide = () => ProviderTest.fake({ model: createModel({ context: 100_000, output: 32_000 }) })
+const base = createModel({ context: 100_000, output: 32_000 }) // kilocode_change
+const wide = () => ProviderTest.fake({ model: base }) // kilocode_change
 
 async function user(sessionID: SessionID, text: string) {
   const msg = await svc.updateMessage({
@@ -167,6 +168,51 @@ function layer(result: "continue" | "compact") {
   )
 }
 
+// kilocode_change start - capture compaction processor input for budget tests
+function captureLayer() {
+  const captured: LLM.StreamInput[] = []
+  const proc = Layer.succeed(
+    SessionProcessorModule.SessionProcessor.Service,
+    SessionProcessorModule.SessionProcessor.Service.of({
+      create: Effect.fn("CaptureSessionProcessor.create")((input) => {
+        const msg = input.assistantMessage
+        return Effect.succeed({
+          get message() {
+            return msg
+          },
+          updateToolCall: Effect.fn("CaptureSessionProcessor.updateToolCall")(() => Effect.succeed(undefined)),
+          completeToolCall: Effect.fn("CaptureSessionProcessor.completeToolCall")(() => Effect.void),
+          process: Effect.fn("CaptureSessionProcessor.process")((input: LLM.StreamInput) => {
+            captured.push(input)
+            return Effect.succeed("continue" as const)
+          }),
+        } satisfies SessionProcessorModule.SessionProcessor.Handle)
+      }),
+    }),
+  )
+  return { captured, proc }
+}
+
+function captureRuntime(
+  proc: Layer.Layer<SessionProcessorModule.SessionProcessor.Service>,
+  plugin = Plugin.defaultLayer,
+  provider = wide(),
+) {
+  const bus = Bus.layer
+  return ManagedRuntime.make(
+    Layer.mergeAll(SessionCompaction.layer, bus).pipe(
+      Layer.provide(provider.layer),
+      Layer.provide(SessionNs.defaultLayer),
+      Layer.provide(proc),
+      Layer.provide(Agent.defaultLayer),
+      Layer.provide(plugin),
+      Layer.provide(bus),
+      Layer.provide(Config.defaultLayer),
+    ),
+  )
+}
+// kilocode_change end
+
 function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) {
   const bus = Bus.layer
   return ManagedRuntime.make(
@@ -183,7 +229,7 @@ function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, p
 }
 
 const deps = Layer.mergeAll(
-  ProviderTest.fake().layer,
+  wide().layer, // kilocode_change
   layer("continue"),
   Agent.defaultLayer,
   Plugin.defaultLayer,
@@ -279,6 +325,22 @@ function autocontinue(enabled: boolean) {
   })
 }
 
+// kilocode_change start - plugin helper for overflow shrinking tests
+function contextPlugin(text: string) {
+  return Layer.mock(Plugin.Service)({
+    trigger: <Name extends string, Input, Output>(name: Name, _input: Input, output: Output) => {
+      if (name !== "experimental.session.compacting") return Effect.succeed(output)
+      return Effect.sync(() => {
+        ;(output as { context: string[] }).context.push(text)
+        return output
+      })
+    },
+    list: () => Effect.succeed([]),
+    init: () => Effect.void,
+  })
+}
+// kilocode_change end
+
 describe("session.compaction.isOverflow", () => {
   it.live(
     "returns true when token count exceeds usable context",
@@ -586,6 +648,97 @@ describe("session.compaction.prune", () => {
     ),
   )
 
+  // kilocode_change start - normal pruning should scale with model budget
+  it.live(
+    "prunes old tool output below the old fixed 40k token threshold on smaller models",
+    provideTmpdirInstance((dir) =>
+      Effect.gen(function* () {
+        const compact = yield* SessionCompaction.Service
+        const ssn = yield* SessionNs.Service
+        const info = yield* ssn.create({})
+        const a = yield* ssn.updateMessage({
+          id: MessageID.ascending(),
+          role: "user",
+          sessionID: info.id,
+          agent: "build",
+          model: ref,
+          time: { created: Date.now() },
+        })
+        yield* ssn.updatePart({
+          id: PartID.ascending(),
+          messageID: a.id,
+          sessionID: info.id,
+          type: "text",
+          text: "first",
+        })
+        const b: MessageV2.Assistant = {
+          id: MessageID.ascending(),
+          role: "assistant",
+          sessionID: info.id,
+          mode: "build",
+          agent: "build",
+          path: { cwd: dir, root: dir },
+          cost: 0,
+          tokens: {
+            output: 0,
+            input: 0,
+            reasoning: 0,
+            cache: { read: 0, write: 0 },
+          },
+          modelID: ref.modelID,
+          providerID: ref.providerID,
+          parentID: a.id,
+          time: { created: Date.now() },
+          finish: "end_turn",
+        }
+        yield* ssn.updateMessage(b)
+        yield* ssn.updatePart({
+          id: PartID.ascending(),
+          messageID: b.id,
+          sessionID: info.id,
+          type: "tool",
+          callID: crypto.randomUUID(),
+          tool: "bash",
+          state: {
+            status: "completed",
+            input: {},
+            output: "x".repeat(60_000),
+            title: "done",
+            metadata: {},
+            time: { start: Date.now(), end: Date.now() },
+          },
+        })
+        for (const text of ["second", "third"]) {
+          const msg = yield* ssn.updateMessage({
+            id: MessageID.ascending(),
+            role: "user",
+            sessionID: info.id,
+            agent: "build",
+            model: ref,
+            time: { created: Date.now() },
+          })
+          yield* ssn.updatePart({
+            id: PartID.ascending(),
+            messageID: msg.id,
+            sessionID: info.id,
+            type: "text",
+            text,
+          })
+        }
+
+        yield* compact.prune({ sessionID: info.id })
+
+        const msgs = yield* ssn.messages({ sessionID: info.id })
+        const part = msgs.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
+        expect(part?.type).toBe("tool")
+        if (part?.type === "tool" && part.state.status === "completed") {
+          expect(part.state.time.compacted).toBeNumber()
+        }
+      }),
+    ),
+  )
+  // kilocode_change end
+
   it.live(
     "skips protected skill tool output",
     provideTmpdirInstance((dir) =>
@@ -887,6 +1040,16 @@ describe("session.compaction.process", () => {
         const session = await svc.create({})
         await user(session.id, "root")
         const replay = await user(session.id, "image")
+        // kilocode_change start - overflow replay should not resend huge text verbatim
+        const huge = "z".repeat(400_000)
+        await svc.updatePart({
+          id: PartID.ascending(),
+          messageID: replay.id,
+          sessionID: session.id,
+          type: "text",
+          text: huge,
+        })
+        // kilocode_change end
         await svc.updatePart({
           id: PartID.ascending(),
           messageID: replay.id,
@@ -920,6 +1083,14 @@ describe("session.compaction.process", () => {
           expect(
             last?.parts.some((part) => part.type === "text" && part.text.includes("Attached image/png: cat.png")),
           ).toBe(true)
+          // kilocode_change start - replayed text is truncated, original replay parts are not resent verbatim
+          expect(last?.parts.some((part) => part.type === "text" && part.text === huge)).toBe(false)
+          expect(
+            last?.parts.some(
+              (part) => part.type === "text" && part.text.includes("truncated") && !part.text.includes("z".repeat(10_000)),
+            ),
+          ).toBe(true)
+          // kilocode_change end
         } finally {
           await rt.dispose()
         }
@@ -1199,6 +1370,132 @@ describe("session.compaction.process", () => {
       },
     })
   })
+
+  // kilocode_change start - overflow compaction gets a stricter model-aware input budget
+  test("shrinks overflow compaction input without mutating stored messages", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const session = await svc.create({})
+        const root = await user(session.id, "root")
+        const reply = await assistant(session.id, root.id, tmp.path)
+        const output = "x".repeat(80_000)
+        await svc.updatePart({
+          id: PartID.ascending(),
+          messageID: reply.id,
+          sessionID: session.id,
+          type: "tool",
+          callID: crypto.randomUUID(),
+          tool: "bash",
+          state: {
+            status: "completed",
+            input: {},
+            output,
+            title: "done",
+            metadata: {},
+            time: { start: Date.now(), end: Date.now() },
+          },
+        })
+        const synthetic = await user(session.id, "synthetic")
+        await svc.updatePart({
+          id: PartID.ascending(),
+          messageID: synthetic.id,
+          sessionID: session.id,
+          type: "text",
+          synthetic: true,
+          text: "y".repeat(20_000),
+        })
+        const normal = await user(session.id, "normal")
+        const huge = "z".repeat(400_000)
+        await svc.updatePart({
+          id: PartID.ascending(),
+          messageID: normal.id,
+          sessionID: session.id,
+          type: "text",
+          text: huge,
+        })
+        await user(session.id, "replay")
+        const msg = await user(session.id, "current")
+        const { captured, proc } = captureLayer()
+        const rt = captureRuntime(proc)
+        try {
+          const msgs = await svc.messages({ sessionID: session.id })
+          const result = await rt.runPromise(
+            SessionCompaction.Service.use((svc) =>
+              svc.process({
+                parentID: msg.id,
+                messages: msgs,
+                sessionID: session.id,
+                auto: true,
+                overflow: true,
+              }),
+            ),
+          )
+
+          expect(result).toBe("continue")
+          expect(captured.length).toBe(1)
+          const body = JSON.stringify(captured[0].messages)
+          expect(body).not.toContain("x".repeat(10_000))
+          expect(body).not.toContain("y".repeat(10_000))
+          expect(body).toContain("z".repeat(100))
+          expect(body).not.toContain("z".repeat(10_000))
+          expect(body).toContain("truncated")
+
+          const stored = await svc.messages({ sessionID: session.id })
+          const part = stored.flatMap((msg) => msg.parts).find((part) => part.type === "tool")
+          expect(part?.type).toBe("tool")
+          if (part?.type === "tool" && part.state.status === "completed") {
+            expect(part.state.output).toBe(output)
+          }
+          expect(stored.flatMap((msg) => msg.parts).some((part) => part.type === "text" && part.text === huge)).toBe(true)
+        } finally {
+          await rt.dispose()
+        }
+      },
+    })
+  })
+
+  test("shrinks older overflow messages before plugin context is added", async () => {
+    await using tmp = await tmpdir()
+    await Instance.provide({
+      directory: tmp.path,
+      fn: async () => {
+        const session = await svc.create({})
+        for (let i = 0; i < 80; i++) {
+          await user(session.id, `msg-${String(i).padStart(3, "0")}`)
+        }
+        const msg = await user(session.id, "current")
+        const { captured, proc } = captureLayer()
+        const rt = captureRuntime(proc, contextPlugin("mcp-tool-schema-context-" + "z".repeat(20_000)))
+        try {
+          const msgs = await svc.messages({ sessionID: session.id })
+          const result = await rt.runPromise(
+            SessionCompaction.Service.use((svc) =>
+              svc.process({
+                parentID: msg.id,
+                messages: msgs,
+                sessionID: session.id,
+                auto: true,
+                overflow: true,
+              }),
+            ),
+          )
+
+          expect(result).toBe("continue")
+          expect(captured.length).toBe(1)
+          const text = JSON.stringify(captured[0].messages)
+          expect(text).toContain("mcp-tool-schema-context")
+          expect(text).not.toContain("msg-000")
+          expect(text).not.toContain("msg-020")
+          expect(text).toContain("msg-078")
+        } finally {
+          await rt.dispose()
+        }
+      },
+    })
+  })
+  // kilocode_change end
 })
 
 describe("util.token.estimate", () => {