|
| 1 | +/** |
| 2 | + * Per-concept prompt-budget enforcement (issue #39). |
| 3 | + * |
| 4 | + * When the same concept is extracted from many overlapping sources, the |
| 5 | + * page-generation prompt would otherwise concatenate every full source |
| 6 | + * — linear in source count — and reliably blow past the LLM provider's |
| 7 | + * context window. This module clips each contributing source's slice to |
| 8 | + * a fair share of a configurable total budget and emits a single warning |
| 9 | + * when truncation kicks in. |
| 10 | + * |
| 11 | + * The fix is deliberately defensive (proportional truncation) rather than |
| 12 | + * smart (semantic ranking / summarisation). It prevents crashes while a |
| 13 | + * deeper retrieval-driven solution is designed. |
| 14 | + */ |
| 15 | + |
| 16 | +import * as output from "../utils/output.js"; |
| 17 | +import { |
| 18 | + DEFAULT_PROMPT_BUDGET_CHARS, |
| 19 | + PROMPT_BUDGET_ENV_VAR, |
| 20 | +} from "../utils/constants.js"; |
| 21 | + |
| 22 | +/** Marker appended to a source slice when it was truncated to fit the budget. */ |
| 23 | +const TRUNCATION_MARKER = "\n\n[…truncated for prompt budget — see #39…]"; |
| 24 | + |
| 25 | +/** A single source's contribution to the combined per-concept content. */ |
| 26 | +export interface SourceSlice { |
| 27 | + /** Source filename (e.g. "ml-paper.md") shown as a section header in the prompt. */ |
| 28 | + file: string; |
| 29 | + /** Raw extracted source content, before any budgeting. */ |
| 30 | + content: string; |
| 31 | +} |
| 32 | + |
| 33 | +/** |
| 34 | + * Resolve the active prompt-budget character cap. Reads the |
| 35 | + * `LLMWIKI_PROMPT_BUDGET_CHARS` env var when present and parseable; falls |
| 36 | + * back to `DEFAULT_PROMPT_BUDGET_CHARS`. Invalid values (non-numeric or |
| 37 | + * <= 0) are ignored so a typo can't accidentally truncate every prompt |
| 38 | + * to nothing. |
| 39 | + */ |
| 40 | +export function resolvePromptBudgetChars(): number { |
| 41 | + const raw = process.env[PROMPT_BUDGET_ENV_VAR]; |
| 42 | + if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS; |
| 43 | + const parsed = Number.parseInt(raw, 10); |
| 44 | + if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS; |
| 45 | + return parsed; |
| 46 | +} |
| 47 | + |
| 48 | +/** |
| 49 | + * Combine per-source slices into the single content blob the LLM prompt |
| 50 | + * receives, applying a fair-share budget when the raw total would exceed |
| 51 | + * the configured ceiling. When no truncation is needed the output is |
| 52 | + * byte-identical to the previous unbudgeted concatenation, so existing |
| 53 | + * compile output is unchanged for typical workloads. |
| 54 | + * |
| 55 | + * @param concept - Human-readable concept title (used in the warning only). |
| 56 | + * @param slices - One entry per contributing source, in arrival order. |
| 57 | + * @returns The combined content string suitable for buildPagePrompt. |
| 58 | + */ |
| 59 | +export function buildBudgetedCombinedContent( |
| 60 | + concept: string, |
| 61 | + slices: SourceSlice[], |
| 62 | +): string { |
| 63 | + const budget = resolvePromptBudgetChars(); |
| 64 | + const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0); |
| 65 | + |
| 66 | + if (totalRaw <= budget) { |
| 67 | + return formatSlices(slices); |
| 68 | + } |
| 69 | + |
| 70 | + const perSource = Math.max(1, Math.floor(budget / slices.length)); |
| 71 | + warnTruncation(concept, totalRaw, slices.length, perSource, budget); |
| 72 | + |
| 73 | + const trimmed = slices.map((s) => |
| 74 | + s.content.length > perSource |
| 75 | + ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } |
| 76 | + : s, |
| 77 | + ); |
| 78 | + return formatSlices(trimmed); |
| 79 | +} |
| 80 | + |
| 81 | +/** Render the slice list using the same `--- SOURCE: ---` headers the LLM is taught to read. */ |
| 82 | +function formatSlices(slices: SourceSlice[]): string { |
| 83 | + return slices |
| 84 | + .map((s) => `--- SOURCE: ${s.file} ---\n\n${s.content}`) |
| 85 | + .join("\n\n"); |
| 86 | +} |
| 87 | + |
| 88 | +/** Emit a single, actionable warning when the budget kicks in for a concept. */ |
| 89 | +function warnTruncation( |
| 90 | + concept: string, |
| 91 | + totalRaw: number, |
| 92 | + sourceCount: number, |
| 93 | + perSource: number, |
| 94 | + budget: number, |
| 95 | +): void { |
| 96 | + output.status( |
| 97 | + "!", |
| 98 | + output.warn( |
| 99 | + `Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ` + |
| 100 | + `${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; ` + |
| 101 | + `truncating each source to ~${perSource.toLocaleString()} chars. ` + |
| 102 | + `Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`, |
| 103 | + ), |
| 104 | + ); |
| 105 | +} |
0 commit comments