-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix(cli): scale compaction pruning by model budget #9557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| "@kilocode/cli": patch | ||
| --- | ||
|
|
||
| Improve automatic compaction recovery for large sessions by scaling pruning budgets to the active model and shrinking overflow compaction input. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event" | |
| import { Bus } from "@/bus" | ||
| import * as Session from "./session" | ||
| import { SessionID, MessageID, PartID } from "./schema" | ||
| import { Provider } from "../provider" | ||
| import { Provider, ProviderTransform } from "../provider" // kilocode_change | ||
| import { MessageV2 } from "./message-v2" | ||
| import z from "zod" | ||
| import { Token } from "../util" | ||
|
|
@@ -31,6 +31,85 @@ export const Event = { | |
| export const PRUNE_MINIMUM = 20_000 | ||
| export const PRUNE_PROTECT = 40_000 | ||
| const PRUNE_PROTECTED_TOOLS = ["skill"] | ||
| // kilocode_change start - model-aware compaction budgets | ||
| const BUDGET_BUFFER = 20_000 | ||
| const BUDGET_NORMAL_RATIO = 0.2 | ||
| const BUDGET_OVERFLOW_RATIO = 0.05 | ||
| const BUDGET_PROMPT_RATIO = 0.1 | ||
| const BUDGET_NORMAL_MIN = 8_000 | ||
| const BUDGET_NORMAL_MAX = 60_000 | ||
| const BUDGET_OVERFLOW_MIN = 2_000 | ||
| const BUDGET_OVERFLOW_MAX = 15_000 | ||
| const BUDGET_OVERFLOW_TEXT_MIN = 500 | ||
| const BUDGET_OVERFLOW_TEXT_MAX = 2_000 | ||
| const BUDGET_OVERFLOW_TOOL_MIN = 500 | ||
| const BUDGET_OVERFLOW_TOOL_MAX = 4_000 | ||
|
|
||
| function clamp(input: { value: number; min: number; max: number }) { | ||
| return Math.max(input.min, Math.min(input.max, input.value)) | ||
| } | ||
|
|
||
| function budget(input: { cfg: Config.Info; model: Provider.Model }) { | ||
| const output = ProviderTransform.maxOutputTokens(input.model) | ||
| const limit = input.model.limit.input || input.model.limit.context | ||
| const reserved = input.cfg.compaction?.reserved ?? (input.model.limit.input ? Math.min(BUDGET_BUFFER, output) : output) | ||
| const prompt = Math.floor(limit * BUDGET_PROMPT_RATIO) | ||
| const usable = Math.max(0, limit - reserved - prompt) | ||
| const available = usable | ||
| const normal = clamp({ | ||
| value: Math.floor(available * BUDGET_NORMAL_RATIO), | ||
| min: BUDGET_NORMAL_MIN, | ||
| max: BUDGET_NORMAL_MAX, | ||
| }) | ||
| const overflow = clamp({ | ||
| value: Math.floor(available * BUDGET_OVERFLOW_RATIO), | ||
| min: BUDGET_OVERFLOW_MIN, | ||
| max: BUDGET_OVERFLOW_MAX, | ||
| }) | ||
| return { | ||
| usable, | ||
| normal, | ||
| overflow, | ||
| tool: clamp({ value: overflow, min: BUDGET_OVERFLOW_TOOL_MIN, max: BUDGET_OVERFLOW_TOOL_MAX }), | ||
| text: clamp({ value: Math.floor(overflow / 2), min: BUDGET_OVERFLOW_TEXT_MIN, max: BUDGET_OVERFLOW_TEXT_MAX }), | ||
| messages: usable < 96_000 ? 20 : usable < 224_000 ? 40 : 80, | ||
| } | ||
| } | ||
|
|
||
| function truncate(input: { text: string; chars: number; label: string }) { | ||
| if (input.text.length <= input.chars) return input.text | ||
| return `${input.text.slice(0, input.chars)}\n\n[... truncated ${input.text.length - input.chars} chars for ${input.label}]` | ||
| } | ||
|
|
||
| function shrink(input: { messages: MessageV2.WithParts[]; budget: ReturnType<typeof budget> }) { | ||
| const msgs = input.messages.length > input.budget.messages ? input.messages.slice(-input.budget.messages) : input.messages | ||
| let total = 0 | ||
| return msgs.map((msg) => ({ | ||
| ...msg, | ||
| parts: msg.parts.map((part) => { | ||
| if (part.type === "tool" && part.state.status === "completed") { | ||
| const estimate = Token.estimate(part.state.output) | ||
| total += estimate | ||
| if (total <= input.budget.overflow && part.state.output.length <= input.budget.tool) return part | ||
| return { | ||
| ...part, | ||
| state: { | ||
| ...part.state, | ||
| output: truncate({ text: part.state.output, chars: input.budget.tool, label: "overflow compaction" }), | ||
| }, | ||
| } | ||
| } | ||
| if (part.type === "text" && part.synthetic) { | ||
| return { | ||
| ...part, | ||
| text: truncate({ text: part.text, chars: input.budget.text, label: "overflow compaction" }), | ||
| } | ||
| } | ||
| return part | ||
| }), | ||
| })) | ||
| } | ||
| // kilocode_change end | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not keep this whole section in a separate file, and import it to keep the merge simpler? |
||
|
|
||
| export interface Interface { | ||
| readonly isOverflow: (input: { | ||
|
|
@@ -96,6 +175,14 @@ export const layer: Layer.Layer< | |
| .pipe(Effect.catchIf(NotFoundError.isInstance, () => Effect.succeed(undefined))) | ||
| if (!msgs) return | ||
|
|
||
| // kilocode_change start - scale protected tool-output window with the active model | ||
| const last = msgs.findLast((msg) => msg.info.role === "user") | ||
| const model = last?.info.role === "user" ? yield* provider.getModel(last.info.model.providerID, last.info.model.modelID) : undefined | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WARNING: Pruning now silently stops when the session model is no longer available
|
||
| const cap = model ? budget({ cfg, model }) : undefined | ||
| const protect = cap ? cap.normal : PRUNE_PROTECT | ||
| const minimum = cap ? Math.min(PRUNE_MINIMUM, Math.floor(protect * 0.75)) : PRUNE_MINIMUM | ||
| // kilocode_change end | ||
|
|
||
| let total = 0 | ||
| let pruned = 0 | ||
| const toPrune: MessageV2.ToolPart[] = [] | ||
|
|
@@ -114,7 +201,7 @@ export const layer: Layer.Layer< | |
| if (part.state.time.compacted) break loop | ||
| const estimate = Token.estimate(part.state.output) | ||
| total += estimate | ||
| if (total > PRUNE_PROTECT) { | ||
| if (total > protect) { // kilocode_change | ||
| pruned += estimate | ||
| toPrune.push(part) | ||
| } | ||
|
|
@@ -123,7 +210,7 @@ export const layer: Layer.Layer< | |
| } | ||
|
|
||
| log.info("found", { pruned, total }) | ||
| if (pruned > PRUNE_MINIMUM) { | ||
| if (pruned > minimum) { // kilocode_change | ||
| for (const part of toPrune) { | ||
| if (part.state.status === "completed") { | ||
| part.state.time.compacted = Date.now() | ||
|
|
@@ -176,6 +263,16 @@ export const layer: Layer.Layer< | |
| const model = agent.model | ||
| ? yield* provider.getModel(agent.model.providerID, agent.model.modelID) | ||
| : yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID) | ||
| // kilocode_change start - overflow compaction must fit even with MCP/tool schema/plugin prompt overhead | ||
| if (input.overflow) { | ||
| const cfg = yield* config.get() | ||
| const cap = budget({ cfg, model }) | ||
| if (messages.length > cap.messages) { | ||
| log.info("overflow compaction: trimming old messages", { before: messages.length, after: cap.messages }) | ||
| } | ||
| messages = shrink({ messages, budget: cap }) | ||
| } | ||
| // kilocode_change end | ||
| // Allow plugins to inject context or replace compaction prompt. | ||
| const compacting = yield* plugin.trigger( | ||
| "experimental.session.compacting", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WARNING: The minimum budgets still overshoot small-window models
budget()is meant to scale by model capacity, but these floors forcenormal >= 8_000andoverflow >= 2_000even whenusableis smaller than that. On 4k/8k-context models the compaction path can still keep more tool/text content than the model can fit, so overflow recovery can recurse instead of reliably making the summary request fit.