Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/model-aware-compaction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@kilocode/cli": patch
---

Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input.
103 changes: 100 additions & 3 deletions packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event"
import { Bus } from "@/bus"
import * as Session from "./session"
import { SessionID, MessageID, PartID } from "./schema"
import { Provider } from "../provider"
import { Provider, ProviderTransform } from "../provider" // kilocode_change
import { MessageV2 } from "./message-v2"
import z from "zod"
import { Token } from "../util"
Expand Down Expand Up @@ -31,6 +31,85 @@ export const Event = {
export const PRUNE_MINIMUM = 20_000
export const PRUNE_PROTECT = 40_000
const PRUNE_PROTECTED_TOOLS = ["skill"]
// kilocode_change start - model-aware compaction budgets
const BUDGET_BUFFER = 20_000
const BUDGET_NORMAL_RATIO = 0.2
const BUDGET_OVERFLOW_RATIO = 0.05
const BUDGET_PROMPT_RATIO = 0.1
const BUDGET_NORMAL_MIN = 8_000
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: The minimum budgets still overshoot small-window models

budget() is meant to scale by model capacity, but these floors force normal >= 8_000 and overflow >= 2_000 even when usable is smaller than that. On 4k/8k-context models the compaction path can still keep more tool/text content than the model can fit, so overflow recovery can recurse instead of reliably making the summary request fit.

const BUDGET_NORMAL_MAX = 60_000
const BUDGET_OVERFLOW_MIN = 2_000
const BUDGET_OVERFLOW_MAX = 15_000
const BUDGET_OVERFLOW_TEXT_MIN = 500
const BUDGET_OVERFLOW_TEXT_MAX = 2_000
const BUDGET_OVERFLOW_TOOL_MIN = 500
const BUDGET_OVERFLOW_TOOL_MAX = 4_000

function clamp(input: { value: number; min: number; max: number }) {
return Math.max(input.min, Math.min(input.max, input.value))
}

function budget(input: { cfg: Config.Info; model: Provider.Model }) {
const output = ProviderTransform.maxOutputTokens(input.model)
const limit = input.model.limit.input || input.model.limit.context
const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output)
const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO)
const usable = Math.max(0, limit - reserved - prompt)
const available = usable
const normal = clamp({
value: Math.floor(available * BUDGET_NORMAL_RATIO),
min: BUDGET_NORMAL_MIN,
max: BUDGET_NORMAL_MAX,
})
const overflow = clamp({
value: Math.floor(available * BUDGET_OVERFLOW_RATIO),
min: BUDGET_OVERFLOW_MIN,
max: BUDGET_OVERFLOW_MAX,
})
return {
usable,
normal,
overflow,
tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }),
text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }),
messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80,
}
}

function truncate(input: { text: string; chars: number; label: string }) {
if (input.text.length <= input.chars) return input.text
return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]`
}

function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType<typeof budget> }) {
const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages
let total = 0
return msgs.map((msg) => ({
...msg,
parts: msg.parts.map((part) => {
if (part.type === "tool" && part.state.status === "completed") {
const estimate = Token.estimate(part.state.output)
total += estimate
if (total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part
return {
...part,
state: {
...part.state,
output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }),
},
}
}
if (part.type === "text" && part.synthetic) {
return {
...part,
text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }),
}
}
return part
}),
}))
}
// kilocode_change end
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not keep this whole section in a separate file, and import it to keep the merge simpler?


export interface Interface {
readonly isOverflow: (input: {
Expand Down Expand Up @@ -96,6 +175,14 @@ export const layer: Layer.Layer<
.pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined)))
if (!msgs) return

// kilocode_change start - scale protected tool-output window with the active model
const last = msgs.findLast((msg) => msg.info.role === "user")
const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Pruning now silently stops when the session model is no longer available

provider.getModel(...) throws for deleted or renamed models. prompt.ts forks compaction.prune(...).pipe(Effect.ignore), so this turns background pruning into a silent no-op and old sessions keep their large tool outputs indefinitely. Falling back to PRUNE_PROTECT/PRUNE_MINIMUM when lookup fails would preserve the previous behavior.

const cap = model ? budget({ cfg, model }) : undefined
const protect = cap ? cap.normal : PRUNE_PROTECT
const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM
// kilocode_change end

let total = 0
let pruned = 0
const toPrune: MessageV2.ToolPart[] = []
Expand All @@ -114,7 +201,7 @@ export const layer: Layer.Layer<
if (part.state.time.compacted) break loop
const estimate = Token.estimate(part.state.output)
total += estimate
if (total > PRUNE_PROTECT) {
if (total > protect) { // kilocode_change
pruned += estimate
toPrune.push(part)
}
Expand All @@ -123,7 +210,7 @@ export const layer: Layer.Layer<
}

log.info("found", { pruned, total })
if (pruned > PRUNE_MINIMUM) {
if (pruned > minimum) { // kilocode_change
for (const part of toPrune) {
if (part.state.status === "completed") {
part.state.time.compacted = Date.now()
Expand Down Expand Up @@ -176,6 +263,16 @@ export const layer: Layer.Layer<
const model = agent.model
? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
: yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
// kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead
if (input.overflow) {
const cfg = yield* config.get()
const cap = budget({ cfg, model })
if (messages.length > cap.messages) {
log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages })
}
messages = shrink({ messages, budget: cap })
}
// kilocode_change end
// Allow plugins to inject context or replace compaction prompt.
const compacting = yield* plugin.trigger(
"experimental.session.compacting",
Expand Down
Loading
Loading