From 63443a6ba0a3104ee88cd765a33d6042bc5f9829 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Sun, 24 May 2026 20:14:59 -0400 Subject: [PATCH] fix(ui): cap expanded context source reads --- CHANGELOG.md | 2 + src/core/fileSource.test.ts | 75 ++++++++++++++- src/core/fileSource.ts | 110 +++++++++++++++++++--- src/core/loaders.test.ts | 29 ++++++ src/ui/diff/expandCollapsedRows.test.ts | 17 ++++ src/ui/diff/expandCollapsedRows.ts | 10 +- src/ui/hooks/useReviewController.test.tsx | 27 ++++++ src/ui/hooks/useReviewController.ts | 17 +++- 8 files changed, 267 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31ff2e4a..868bf5e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ All notable user-visible changes to Hunk are documented in this file. ### Fixed +- Capped inline context expansion source reads so huge files cannot freeze or exhaust memory when expanding unchanged lines. + ## [0.14.0-beta.1] - 2026-05-24 ### Fixed diff --git a/src/core/fileSource.test.ts b/src/core/fileSource.test.ts index ecdf3323..7c2a7fb3 100644 --- a/src/core/fileSource.test.ts +++ b/src/core/fileSource.test.ts @@ -2,7 +2,7 @@ import { afterEach, describe, expect, test } from "bun:test"; import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { createFileSourceFetcher } from "./fileSource"; +import { createFileSourceFetcher, SourceTextTooLargeError } from "./fileSource"; const tempDirs: string[] = []; @@ -100,6 +100,22 @@ describe("createFileSourceFetcher", () => { expect(await fetcher.getFullText("old")).toBeNull(); }); + test("rejects fs source reads that exceed the configured byte cap", async () => { + const dir = createTempDir("hunk-source-fs-large-"); + const target = join(dir, "large.txt"); + writeFileSync(target, "0123456789\n"); + + const fetcher = createFileSourceFetcher( + { + old: { kind: "fs", absolutePath: target }, + new: { kind: "none" }, + }, + { maxSourceBytes: 5 }, + ); + + await expect(fetcher.getFullText("old")).rejects.toBeInstanceOf(SourceTextTooLargeError); + }); + test("reads git blob contents for both sides via `git show`", async () => { const repoRoot = createTempRepo("hunk-source-git-"); const filePath = "note.txt"; @@ -140,6 +156,63 @@ describe("createFileSourceFetcher", () => { expect(await fetcher.getFullText("new")).toBe("working tree\n"); }); + test("rejects git blob and index source reads that exceed the configured byte cap", async () => { + const repoRoot = createTempRepo("hunk-source-git-large-"); + const filePath = "note.txt"; + + writeFileSync(join(repoRoot, filePath), "committed source\n"); + git(repoRoot, "add", filePath); + git(repoRoot, "commit", "-m", "first"); + writeFileSync(join(repoRoot, filePath), "staged source\n"); + git(repoRoot, "add", filePath); + + const fetcher = createFileSourceFetcher( + { + old: { kind: "git-blob", repoRoot, ref: "HEAD", path: filePath }, + new: { kind: "git-index", repoRoot, path: filePath }, + }, + { maxSourceBytes: 5 }, + ); + + await expect(fetcher.getFullText("old")).rejects.toBeInstanceOf(SourceTextTooLargeError); + await expect(fetcher.getFullText("new")).rejects.toBeInstanceOf(SourceTextTooLargeError); + }); + + test("treats oversized git stderr as a generic source failure", async () => { + const originalSpawn = Bun.spawn; + const mutableBun = Bun as unknown as { spawn: typeof Bun.spawn }; + + mutableBun.spawn = (() => + originalSpawn( + [ + process.execPath, + "--eval", + "process.stdout.write('small source\\n'); process.stderr.write('x'.repeat(70000));", + ], + { + stdin: "ignore", + stdout: "pipe", + stderr: "pipe", + }, + )) as typeof Bun.spawn; + + try { + const fetcher = createFileSourceFetcher({ + old: { kind: "git-blob", repoRoot: process.cwd(), ref: "HEAD", path: "note.txt" }, + new: { kind: "none" }, + }); + + const loggedErrors = await captureConsoleErrors(async () => { + await expect(fetcher.getFullText("old")).resolves.toBeNull(); + }); + + expect(String(loggedErrors[0]?.[0])).toContain("failed to collect Git source"); + expect(String(loggedErrors[0]?.[1])).toContain("diagnostics exceeded"); + } finally { + mutableBun.spawn = originalSpawn; + } + }); + test("passes custom git executable through async git source reads", async () => { const originalSpawn = Bun.spawn; const mutableBun = Bun as unknown as { spawn: typeof Bun.spawn }; diff --git a/src/core/fileSource.ts b/src/core/fileSource.ts index b4d9701a..7beca90d 100644 --- a/src/core/fileSource.ts +++ b/src/core/fileSource.ts @@ -25,8 +25,19 @@ export interface FileSourceFetcher { getFullText(side: FileSourceSide): Promise; } +export const DEFAULT_SOURCE_TEXT_MAX_BYTES = 1_000_000; + +/** Raised when expanded-context source would require reading an unsafe amount of text. */ +export class SourceTextTooLargeError extends Error { + constructor(readonly maxBytes: number) { + super(`Source text exceeds ${maxBytes} bytes.`); + this.name = "SourceTextTooLargeError"; + } +} + export interface FileSourceFetcherOptions { gitExecutable?: string; + maxSourceBytes?: number; } interface ResolvedSpecs { @@ -65,15 +76,26 @@ function isExpectedMissingGitSource(stderr: string) { ].some((fragment) => normalized.includes(fragment)); } -async function readFsSpec(spec: Extract): Promise { +async function readFsSpec( + spec: Extract, + maxSourceBytes: number, +): Promise { try { const file = Bun.file(spec.absolutePath); if (!(await file.exists())) { return null; } + if (file.size > maxSourceBytes) { + throw new SourceTextTooLargeError(maxSourceBytes); + } + return await file.text(); } catch (error) { + if (error instanceof SourceTextTooLargeError) { + throw error; + } + logSourceDiagnostic(`failed to read source file ${spec.absolutePath}`, error); return null; } @@ -82,15 +104,63 @@ async function readFsSpec(spec: Extract): Promis function readGitBlobSpec( spec: Extract, gitExecutable = "git", + maxSourceBytes: number, ): Promise { - return readGitObjectSpec(spec.repoRoot, `${spec.ref}:${spec.path}`, gitExecutable); + return readGitObjectSpec( + spec.repoRoot, + `${spec.ref}:${spec.path}`, + gitExecutable, + maxSourceBytes, + ); } function readGitIndexSpec( spec: Extract, gitExecutable = "git", + maxSourceBytes: number, ): Promise { - return readGitObjectSpec(spec.repoRoot, `:${spec.path}`, gitExecutable); + return readGitObjectSpec(spec.repoRoot, `:${spec.path}`, gitExecutable, maxSourceBytes); +} + +async function readStreamTextWithLimit( + stream: ReadableStream | null, + maxBytes: number, + onTooLarge?: () => void, + createLimitError: (maxBytes: number) => Error = (maxBytes) => + new SourceTextTooLargeError(maxBytes), +) { + if (!stream) { + return ""; + } + + const reader = stream.getReader(); + const chunks: Uint8Array[] = []; + let totalBytes = 0; + + for (;;) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + totalBytes += value.byteLength; + if (totalBytes > maxBytes) { + onTooLarge?.(); + await reader.cancel().catch(() => undefined); + throw createLimitError(maxBytes); + } + + chunks.push(value); + } + + const combined = new Uint8Array(totalBytes); + let offset = 0; + for (const chunk of chunks) { + combined.set(chunk, offset); + offset += chunk.byteLength; + } + + return new TextDecoder().decode(combined); } /** Read a blob-like Git object spec such as `HEAD:path` or `:path`. */ @@ -98,6 +168,7 @@ async function readGitObjectSpec( repoRoot: string, objectName: string, gitExecutable = "git", + maxSourceBytes: number, ): Promise { let proc: Bun.ReadableSubprocess; @@ -117,10 +188,21 @@ async function readGitObjectSpec( try { output = await Promise.all([ proc.exited, - new Response(proc.stdout).text(), - new Response(proc.stderr).text(), + readStreamTextWithLimit(proc.stdout, maxSourceBytes, () => proc.kill()), + readStreamTextWithLimit( + proc.stderr, + 64 * 1024, + undefined, + (maxBytes) => new Error(`Git source diagnostics exceeded ${maxBytes} bytes.`), + ), ]); } catch (error) { + if (error instanceof SourceTextTooLargeError) { + proc.kill(); + await proc.exited.catch(() => undefined); + throw error; + } + logSourceDiagnostic(`failed to collect Git source ${objectName}`, error); return null; } @@ -139,27 +221,33 @@ async function readGitObjectSpec( async function readSpec( spec: FileSourceSpec, - { gitExecutable = "git" }: FileSourceFetcherOptions = {}, + { + gitExecutable = "git", + maxSourceBytes = DEFAULT_SOURCE_TEXT_MAX_BYTES, + }: FileSourceFetcherOptions = {}, ): Promise { if (spec.kind === "none") { return null; } if (spec.kind === "fs") { - return readFsSpec(spec); + return readFsSpec(spec, maxSourceBytes); } if (spec.kind === "git-index") { - return readGitIndexSpec(spec, gitExecutable); + return readGitIndexSpec(spec, gitExecutable, maxSourceBytes); } - return readGitBlobSpec(spec, gitExecutable); + return readGitBlobSpec(spec, gitExecutable, maxSourceBytes); } /** Build a per-file source fetcher that caches each side's resolved text. */ export function createFileSourceFetcher( specs: ResolvedSpecs, - { gitExecutable = "git" }: Readonly = {}, + { + gitExecutable = "git", + maxSourceBytes = DEFAULT_SOURCE_TEXT_MAX_BYTES, + }: Readonly = {}, ): FileSourceFetcher { const cache = new Map(); @@ -169,7 +257,7 @@ export function createFileSourceFetcher( return cache.get(side) ?? null; } - const text = await readSpec(specs[side], { gitExecutable }); + const text = await readSpec(specs[side], { gitExecutable, maxSourceBytes }); cache.set(side, text); return text; }, diff --git a/src/core/loaders.test.ts b/src/core/loaders.test.ts index 2746552e..6b111ce5 100644 --- a/src/core/loaders.test.ts +++ b/src/core/loaders.test.ts @@ -2,6 +2,7 @@ import { afterEach, describe, expect, test } from "bun:test"; import { mkdirSync, mkdtempSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs"; import { platform, tmpdir } from "node:os"; import { join } from "node:path"; +import { SourceTextTooLargeError } from "./fileSource"; import { loadAppBootstrap } from "./loaders"; import type { CliInput } from "./types"; @@ -338,6 +339,7 @@ describe("loadAppBootstrap", () => { deletions: 1, }); expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0); + expect(bootstrap.changeset.files[0]?.sourceFetcher).toBeUndefined(); }); test("keeps generated large untracked files as skipped placeholders", async () => { @@ -363,6 +365,7 @@ describe("loadAppBootstrap", () => { }); expect(bootstrap.changeset.files[0]?.statsTruncated).toBe(false); expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0); + expect(bootstrap.changeset.files[0]?.sourceFetcher).toBeUndefined(); }); test("caps skipped untracked-file stats when byte-size detection would require a full huge read", async () => { @@ -1561,6 +1564,32 @@ describe("loadAppBootstrap source fetcher attachment", () => { expect(await file?.sourceFetcher?.getFullText("old")).toBe("first\n"); }); + test("`hunk show ` refuses to expand source blobs above the source cap", async () => { + const dir = createTempRepo("hunk-source-show-large-"); + const lines = Array.from({ length: 130_000 }, (_, index) => `line ${index + 1}`); + writeFileSync(join(dir, "large.txt"), `${lines.join("\n")}\n`); + git(dir, "add", "large.txt"); + git(dir, "commit", "-m", "initial"); + + lines[65_000] = "line 65001 changed"; + writeFileSync(join(dir, "large.txt"), `${lines.join("\n")}\n`); + git(dir, "add", "large.txt"); + git(dir, "commit", "-m", "change middle line"); + + const bootstrap = await loadFromRepo(dir, { + kind: "show", + ref: "HEAD", + options: { mode: "auto" }, + }); + + const file = bootstrap.changeset.files[0]; + expect(file?.sourceFetcher).toBeDefined(); + expect(file?.metadata.hunks.length).toBeGreaterThan(0); + await expect(file?.sourceFetcher?.getFullText("new")).rejects.toBeInstanceOf( + SourceTextTooLargeError, + ); + }); + test("`hunk show ` pins expansion sources after the ref moves", async () => { const dir = createTempRepo("hunk-source-show-pinned-"); writeFileSync(join(dir, "value.txt"), "first\n"); diff --git a/src/ui/diff/expandCollapsedRows.test.ts b/src/ui/diff/expandCollapsedRows.test.ts index a285842a..a6d7c3ea 100644 --- a/src/ui/diff/expandCollapsedRows.test.ts +++ b/src/ui/diff/expandCollapsedRows.test.ts @@ -101,6 +101,23 @@ describe("expandCollapsedRows", () => { expect(collapsed.text.toLowerCase()).toContain("could not load"); }); + test("rewrites the label when source is too large to expand", () => { + const rows: DiffRow[] = [makeCollapsedRow("before", 0, [1, 3], [1, 3]), makeHunkHeader(0)]; + + const result = expandCollapsedRows(rows, { + layout: "split", + expandedKeys: new Set([gapKey("before", 0)]), + sourceStatus: { kind: "error", reason: "too-large" }, + side: "new", + }); + + const collapsed = result[0]; + if (!collapsed || collapsed.type !== "collapsed") { + throw new Error("expected first row to be collapsed"); + } + expect(collapsed.text.toLowerCase()).toContain("source too large"); + }); + test("inserts split-line context rows after the expanded collapsed row", () => { const rows: DiffRow[] = [makeCollapsedRow("before", 0, [1, 3], [1, 3]), makeHunkHeader(0)]; diff --git a/src/ui/diff/expandCollapsedRows.ts b/src/ui/diff/expandCollapsedRows.ts index bee9d73f..c5b34d66 100644 --- a/src/ui/diff/expandCollapsedRows.ts +++ b/src/ui/diff/expandCollapsedRows.ts @@ -13,7 +13,7 @@ export type ExpansionLayout = "split" | "stack"; export type FileSourceStatus = | { kind: "loading" } | { kind: "loaded"; text: string } - | { kind: "error" }; + | { kind: "error"; reason?: "too-large" }; export interface ExpandCollapsedRowsOptions { layout: ExpansionLayout; @@ -68,7 +68,11 @@ function loadingRowText(lineCount: number) { return `Loading ${lineCount} unchanged ${lineCount === 1 ? "line" : "lines"}…`; } -function errorRowText(lineCount: number) { +function errorRowText(lineCount: number, reason?: "too-large") { + if (reason === "too-large") { + return `Source too large to expand ${lineCount} unchanged ${lineCount === 1 ? "line" : "lines"}`; + } + return `Could not load ${lineCount} unchanged ${lineCount === 1 ? "line" : "lines"}`; } @@ -179,7 +183,7 @@ export function expandCollapsedRows( } if (sourceStatus?.kind === "error") { - result.push({ ...row, text: errorRowText(lineCount) }); + result.push({ ...row, text: errorRowText(lineCount, sourceStatus.reason) }); continue; } diff --git a/src/ui/hooks/useReviewController.test.tsx b/src/ui/hooks/useReviewController.test.tsx index 4b5452da..5b99efc9 100644 --- a/src/ui/hooks/useReviewController.test.tsx +++ b/src/ui/hooks/useReviewController.test.tsx @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test"; import { testRender } from "@opentui/react/test-utils"; import { act, StrictMode, useEffect, useState } from "react"; +import { SourceTextTooLargeError } from "../../core/fileSource"; import type { DiffFile } from "../../core/types"; import { createTestDeferred, @@ -723,6 +724,32 @@ describe("useReviewController", () => { } }); + test("toggleGap marks over-limit source loads as too large", async () => { + const tooLargeFetcher = createTestSourceFetcher(() => { + throw new SourceTextTooLargeError(5); + }); + + const { controllerRef, setup } = await renderReviewController([ + createAlphaFile(tooLargeFetcher), + ]); + + try { + await flush(setup); + + await act(async () => { + expectValue(controllerRef.current).toggleGap("alpha", "before:0"); + }); + await flush(setup); + + const status = expectValue(controllerRef.current).sourceStatusByFileId["alpha"]; + expect(status).toEqual({ kind: "error", reason: "too-large" }); + } finally { + await act(async () => { + setup.renderer.destroy(); + }); + } + }); + test("toggleGap caches loaded text and does not re-fetch on the second open", async () => { let readCount = 0; const trackedFetcher = createTestSourceFetcher((side) => { diff --git a/src/ui/hooks/useReviewController.ts b/src/ui/hooks/useReviewController.ts index bc2887f5..0feab177 100644 --- a/src/ui/hooks/useReviewController.ts +++ b/src/ui/hooks/useReviewController.ts @@ -21,6 +21,7 @@ import { firstCommentTargetForHunk, resolveCommentTarget, } from "../../core/liveComments"; +import { SourceTextTooLargeError } from "../../core/fileSource"; import type { AgentAnnotation, DiffFile, UserNoteLineTarget } from "../../core/types"; import type { AppliedCommentBatchResult, @@ -490,11 +491,17 @@ export function useReviewController({ files }: { files: DiffFile[] }): ReviewCon return; } - console.error( - `hunk: failed to load ${side} source for ${file.path} (${file.id}).`, - error, - ); - setSettledStatus({ kind: "error" }); + const reason = error instanceof SourceTextTooLargeError ? "too-large" : undefined; + if (reason !== "too-large") { + console.error( + `hunk: failed to load ${side} source for ${file.path} (${file.id}).`, + error, + ); + } + setSettledStatus({ + kind: "error", + reason, + }); }); }, [allFiles],