Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/model-aware-compaction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@kilocode/cli": patch
---

Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input.
136 changes: 127 additions & 9 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event"
import { Bus } from "@/bus"
import * as Session from "./session"
import { SessionID, MessageID, PartID } from "./schema"
import { Provider } from "../provider"
import { Provider, ProviderTransform } from "../provider" // kilocode_change
import { MessageV2 } from "./message-v2"
import z from "zod"
import { Token } from "../util"
Expand Down Expand Up @@ -31,6 +31,105 @@ export const Event = {
export const PRUNE_MINIMUM = 20_000
export const PRUNE_PROTECT = 40_000
const PRUNE_PROTECTED_TOOLS = ["skill"]
// kilocode_change start - model-aware compaction budgets
const BUDGET_BUFFER = 20_000
const BUDGET_NORMAL_RATIO = 0.2
const BUDGET_OVERFLOW_RATIO = 0.05
const BUDGET_PROMPT_RATIO = 0.1
const BUDGET_NORMAL_MIN = 8_000
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: The minimum budgets still overshoot small-window models

budget() is meant to scale by model capacity, but these floors force normal >= 8_000 and overflow >= 2_000 even when usable is smaller than that. On 4k/8k-context models the compaction path can still keep more tool/text content than the model can fit, so overflow recovery can recurse instead of reliably making the summary request fit.

const BUDGET_NORMAL_MAX = 60_000
const BUDGET_OVERFLOW_MIN = 2_000
const BUDGET_OVERFLOW_MAX = 15_000
const BUDGET_OVERFLOW_TEXT_MIN = 500
const BUDGET_OVERFLOW_TEXT_MAX = 2_000
const BUDGET_OVERFLOW_TOOL_MIN = 500
const BUDGET_OVERFLOW_TOOL_MAX = 4_000

function clamp(input: { value: number; min: number; max: number }) {
return Math.max(input.min, Math.min(input.max, input.value))
}

function budget(input: { cfg: Config.Info; model: Provider.Model }) {
const output = ProviderTransform.maxOutputTokens(input.model)
const limit = input.model.limit.input || input.model.limit.context
const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output)
const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO)
const usable = Math.max(0, limit - reserved - prompt)
const available = usable
const normal = clamp({
value: Math.floor(available * BUDGET_NORMAL_RATIO),
min: BUDGET_NORMAL_MIN,
max: BUDGET_NORMAL_MAX,
})
const overflow = clamp({
value: Math.floor(available * BUDGET_OVERFLOW_RATIO),
min: BUDGET_OVERFLOW_MIN,
max: BUDGET_OVERFLOW_MAX,
})
return {
usable,
normal,
overflow,
tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }),
text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }),
messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80,
}
}

function truncate(input: { text: string; chars: number; label: string }) {
if (input.text.length <= input.chars) return input.text
return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]`
}

function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType<typeof budget> }) {
const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages
const state = { total: 0 }
return msgs.map((msg) => ({
...msg,
parts: msg.parts.map((part) => {
if (part.type === "tool" && part.state.status === "completed") {
const estimate = Token.estimate(part.state.output)
state.total += estimate
if (state.total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part
return {
...part,
state: {
...part.state,
output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }),
},
}
}
if (part.type === "text" && part.synthetic) {
return {
...part,
text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }),
}
}
if (part.type === "text") {
return {
...part,
text: truncate({ text: part.text, chars: input.budget.tool, label: "overflow compaction" }),
}
}
return part
}),
}))
}

function sanitize(input: { part: MessageV2.Part; budget: ReturnType<typeof budget> }) {
if (input.part.type === "compaction") return undefined
if (input.part.type === "file" && MessageV2.isMedia(input.part.mime)) {
return { type: "text" as const, text: `[Attached ${input.part.mime}: ${input.part.filename ?? "file"}]` }
}
if (input.part.type === "text") {
return {
...input.part,
text: truncate({ text: input.part.text, chars: input.budget.tool, label: "overflow replay" }),
}
}
return input.part
}
// kilocode_change end
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not keep this whole section in a separate file, and import it to keep the merge simpler?


export interface Interface {
readonly isOverflow: (input: {
Expand Down Expand Up @@ -96,6 +195,14 @@ export const layer: Layer.Layer<
.pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
if (!msgs) return

// kilocode_change start - scale protected tool-output window with the active model
const last = msgs.findLast((msg) => msg.info.role === "user")
const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Pruning now silently stops when the session model is no longer available

provider.getModel(...) throws for deleted or renamed models. prompt.ts forks compaction.prune(...).pipe(Effect.ignore), so this turns background pruning into a silent no-op and old sessions keep their large tool outputs indefinitely. Falling back to PRUNE_PROTECT/PRUNE_MINIMUM when lookup fails would preserve the previous behavior.

const cap = model ? budget({ cfg, model }) : undefined
const protect = cap ? cap.normal : PRUNE_PROTECT
const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM
// kilocode_change end

let total = 0
let pruned = 0
const toPrune: MessageV2.ToolPart[] = []
Expand All @@ -114,7 +221,7 @@ export const layer: Layer.Layer<
if (part.state.time.compacted) break loop
const estimate = Token.estimate(part.state.output)
total += estimate
if (total > PRUNE_PROTECT) {
if (total > protect) { // kilocode_change
pruned += estimate
toPrune.push(part)
}
Expand All @@ -123,7 +230,7 @@ export const layer: Layer.Layer<
}

log.info("found", { pruned, total })
if (pruned > PRUNE_MINIMUM) {
if (pruned > minimum) { // kilocode_change
for (const part of toPrune) {
if (part.state.status === "completed") {
part.state.time.compacted = Date.now()
Expand Down Expand Up @@ -176,6 +283,18 @@ export const layer: Layer.Layer<
const model = agent.model
? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
: yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
// kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead
const cap = yield* Effect.gen(function* () {
if (!input.overflow) return undefined
const cfg = yield* config.get()
const cap = budget({ cfg, model })
if (messages.length > cap.messages) {
log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages })
}
messages = shrink({ messages, budget: cap })
return cap
})
// kilocode_change end
// Allow plugins to inject context or replace compaction prompt.
const compacting = yield* plugin.trigger(
"experimental.session.compacting",
Expand Down Expand Up @@ -291,17 +410,16 @@ When constructing the summary, try to stick to this template:
system: original.system,
})
for (const part of replay.parts) {
if (part.type === "compaction") continue
const replayPart =
part.type === "file" && MessageV2.isMedia(part.mime)
? { type: "text" as const, text: `[Attached ${part.mime}: ${part.filename ?? "file"}]` }
: part
// kilocode_change start - shrink replayed overflow content before auto-continuing
const cleaned = cap ? sanitize({ part, budget: cap }) : part
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Replay truncation uses the compaction model budget

cap is computed from model, which can resolve to the hidden compaction agent's model. But this replayed turn is re-enqueued with original.model and sent on the next real request using that model. If the compaction agent is configured with a larger context window than the user's model, this sanitization can still leave the replay too large and immediately overflow again.

if (!cleaned) continue
yield* session.updatePart({
...replayPart,
...cleaned,
id: PartID.ascending(),
messageID: replayMsg.id,
sessionID: input.sessionID,
})
// kilocode_change end
}
}

Expand Down
Loading