Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ All notable user-visible changes to Hunk are documented in this file.

### Fixed

- Capped inline context expansion source reads so huge files cannot freeze or exhaust memory when expanding unchanged lines.

## [0.14.0-beta.1] - 2026-05-24

### Fixed
Expand Down
75 changes: 74 additions & 1 deletion src/core/fileSource.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { afterEach, describe, expect, test } from "bun:test";
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { createFileSourceFetcher } from "./fileSource";
import { createFileSourceFetcher, SourceTextTooLargeError } from "./fileSource";

const tempDirs: string[] = [];

Expand Down Expand Up @@ -100,6 +100,22 @@ describe("createFileSourceFetcher", () => {
expect(await fetcher.getFullText("old")).toBeNull();
});

test("rejects fs source reads that exceed the configured byte cap", async () => {
const dir = createTempDir("hunk-source-fs-large-");
const target = join(dir, "large.txt");
writeFileSync(target, "0123456789\n");

const fetcher = createFileSourceFetcher(
{
old: { kind: "fs", absolutePath: target },
new: { kind: "none" },
},
{ maxSourceBytes: 5 },
);

await expect(fetcher.getFullText("old")).rejects.toBeInstanceOf(SourceTextTooLargeError);
});

test("reads git blob contents for both sides via `git show`", async () => {
const repoRoot = createTempRepo("hunk-source-git-");
const filePath = "note.txt";
Expand Down Expand Up @@ -140,6 +156,63 @@ describe("createFileSourceFetcher", () => {
expect(await fetcher.getFullText("new")).toBe("working tree\n");
});

test("rejects git blob and index source reads that exceed the configured byte cap", async () => {
const repoRoot = createTempRepo("hunk-source-git-large-");
const filePath = "note.txt";

writeFileSync(join(repoRoot, filePath), "committed source\n");
git(repoRoot, "add", filePath);
git(repoRoot, "commit", "-m", "first");
writeFileSync(join(repoRoot, filePath), "staged source\n");
git(repoRoot, "add", filePath);

const fetcher = createFileSourceFetcher(
{
old: { kind: "git-blob", repoRoot, ref: "HEAD", path: filePath },
new: { kind: "git-index", repoRoot, path: filePath },
},
{ maxSourceBytes: 5 },
);

await expect(fetcher.getFullText("old")).rejects.toBeInstanceOf(SourceTextTooLargeError);
await expect(fetcher.getFullText("new")).rejects.toBeInstanceOf(SourceTextTooLargeError);
});

test("treats oversized git stderr as a generic source failure", async () => {
const originalSpawn = Bun.spawn;
const mutableBun = Bun as unknown as { spawn: typeof Bun.spawn };

mutableBun.spawn = (() =>
originalSpawn(
[
process.execPath,
"--eval",
"process.stdout.write('small source\\n'); process.stderr.write('x'.repeat(70000));",
],
{
stdin: "ignore",
stdout: "pipe",
stderr: "pipe",
},
)) as typeof Bun.spawn;

try {
const fetcher = createFileSourceFetcher({
old: { kind: "git-blob", repoRoot: process.cwd(), ref: "HEAD", path: "note.txt" },
new: { kind: "none" },
});

const loggedErrors = await captureConsoleErrors(async () => {
await expect(fetcher.getFullText("old")).resolves.toBeNull();
});

expect(String(loggedErrors[0]?.[0])).toContain("failed to collect Git source");
expect(String(loggedErrors[0]?.[1])).toContain("diagnostics exceeded");
} finally {
mutableBun.spawn = originalSpawn;
}
});

test("passes custom git executable through async git source reads", async () => {
const originalSpawn = Bun.spawn;
const mutableBun = Bun as unknown as { spawn: typeof Bun.spawn };
Expand Down
110 changes: 99 additions & 11 deletions src/core/fileSource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,19 @@ export interface FileSourceFetcher {
getFullText(side: FileSourceSide): Promise<string | null>;
}

export const DEFAULT_SOURCE_TEXT_MAX_BYTES = 1_000_000;

/** Raised when expanded-context source would require reading an unsafe amount of text. */
export class SourceTextTooLargeError extends Error {
constructor(readonly maxBytes: number) {
super(`Source text exceeds ${maxBytes} bytes.`);
this.name = "SourceTextTooLargeError";
}
}

export interface FileSourceFetcherOptions {
gitExecutable?: string;
maxSourceBytes?: number;
}

interface ResolvedSpecs {
Expand Down Expand Up @@ -65,15 +76,26 @@ function isExpectedMissingGitSource(stderr: string) {
].some((fragment) => normalized.includes(fragment));
}

async function readFsSpec(spec: Extract<FileSourceSpec, { kind: "fs" }>): Promise<string | null> {
async function readFsSpec(
spec: Extract<FileSourceSpec, { kind: "fs" }>,
maxSourceBytes: number,
): Promise<string | null> {
try {
const file = Bun.file(spec.absolutePath);
if (!(await file.exists())) {
return null;
}

if (file.size > maxSourceBytes) {
throw new SourceTextTooLargeError(maxSourceBytes);
}

return await file.text();
} catch (error) {
if (error instanceof SourceTextTooLargeError) {
throw error;
}

logSourceDiagnostic(`failed to read source file ${spec.absolutePath}`, error);
return null;
}
Expand All @@ -82,22 +104,71 @@ async function readFsSpec(spec: Extract<FileSourceSpec, { kind: "fs" }>): Promis
function readGitBlobSpec(
spec: Extract<FileSourceSpec, { kind: "git-blob" }>,
gitExecutable = "git",
maxSourceBytes: number,
): Promise<string | null> {
return readGitObjectSpec(spec.repoRoot, `${spec.ref}:${spec.path}`, gitExecutable);
return readGitObjectSpec(
spec.repoRoot,
`${spec.ref}:${spec.path}`,
gitExecutable,
maxSourceBytes,
);
}

function readGitIndexSpec(
spec: Extract<FileSourceSpec, { kind: "git-index" }>,
gitExecutable = "git",
maxSourceBytes: number,
): Promise<string | null> {
return readGitObjectSpec(spec.repoRoot, `:${spec.path}`, gitExecutable);
return readGitObjectSpec(spec.repoRoot, `:${spec.path}`, gitExecutable, maxSourceBytes);
}

async function readStreamTextWithLimit(
stream: ReadableStream<Uint8Array> | null,
maxBytes: number,
onTooLarge?: () => void,
createLimitError: (maxBytes: number) => Error = (maxBytes) =>
new SourceTextTooLargeError(maxBytes),
) {
if (!stream) {
return "";
}

const reader = stream.getReader();
const chunks: Uint8Array[] = [];
let totalBytes = 0;

for (;;) {
const { done, value } = await reader.read();
if (done) {
break;
}

totalBytes += value.byteLength;
if (totalBytes > maxBytes) {
onTooLarge?.();
await reader.cancel().catch(() => undefined);
throw createLimitError(maxBytes);
}

chunks.push(value);
}

const combined = new Uint8Array(totalBytes);
let offset = 0;
for (const chunk of chunks) {
combined.set(chunk, offset);
offset += chunk.byteLength;
}

return new TextDecoder().decode(combined);
}

/** Read a blob-like Git object spec such as `HEAD:path` or `:path`. */
async function readGitObjectSpec(
repoRoot: string,
objectName: string,
gitExecutable = "git",
maxSourceBytes: number,
): Promise<string | null> {
let proc: Bun.ReadableSubprocess;

Expand All @@ -117,10 +188,21 @@ async function readGitObjectSpec(
try {
output = await Promise.all([
proc.exited,
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
readStreamTextWithLimit(proc.stdout, maxSourceBytes, () => proc.kill()),
readStreamTextWithLimit(
proc.stderr,
64 * 1024,
undefined,
(maxBytes) => new Error(`Git source diagnostics exceeded ${maxBytes} bytes.`),
),
]);
} catch (error) {
if (error instanceof SourceTextTooLargeError) {
proc.kill();
await proc.exited.catch(() => undefined);
throw error;
}

logSourceDiagnostic(`failed to collect Git source ${objectName}`, error);
return null;
}
Comment on lines 188 to 208

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 SourceTextTooLargeError from stderr reader surfaces as wrong UI message

Both stdout and stderr reads share the same SourceTextTooLargeError type, and the catch block re-throws any instance of it without distinguishing which stream originated it. If git show writes more than 64 KB to stderr (e.g., hook output, verbose trace logging), the error from the stderr readStreamTextWithLimit call will be caught and re-thrown, causing the caller to display "Source too large to expand" even though the actual source content may have been well within the 1 MB limit. The stderr overflow should instead be treated as a generic read failure (logged and return null) rather than mapped to SourceTextTooLargeError.

Prompt To Fix With AI
This is a comment left during a code review.
Path: src/core/fileSource.ts
Line: 186-201

Comment:
**`SourceTextTooLargeError` from stderr reader surfaces as wrong UI message**

Both stdout and stderr reads share the same `SourceTextTooLargeError` type, and the catch block re-throws any instance of it without distinguishing which stream originated it. If `git show` writes more than 64 KB to stderr (e.g., hook output, verbose trace logging), the error from the stderr `readStreamTextWithLimit` call will be caught and re-thrown, causing the caller to display "Source too large to expand" even though the actual source content may have been well within the 1 MB limit. The stderr overflow should instead be treated as a generic read failure (logged and return `null`) rather than mapped to `SourceTextTooLargeError`.

How can I resolve this? If you propose a fix, please make it concise.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed. Oversized stderr now creates a generic diagnostics-limit error instead of SourceTextTooLargeError, so the UI only shows “Source too large…” for over-limit source stdout. I also added a regression test for oversized git stderr returning a generic source failure.

This comment was generated by Pi using OpenAI GPT-5

Expand All @@ -139,27 +221,33 @@ async function readGitObjectSpec(

async function readSpec(
spec: FileSourceSpec,
{ gitExecutable = "git" }: FileSourceFetcherOptions = {},
{
gitExecutable = "git",
maxSourceBytes = DEFAULT_SOURCE_TEXT_MAX_BYTES,
}: FileSourceFetcherOptions = {},
): Promise<string | null> {
if (spec.kind === "none") {
return null;
}

if (spec.kind === "fs") {
return readFsSpec(spec);
return readFsSpec(spec, maxSourceBytes);
}

if (spec.kind === "git-index") {
return readGitIndexSpec(spec, gitExecutable);
return readGitIndexSpec(spec, gitExecutable, maxSourceBytes);
}

return readGitBlobSpec(spec, gitExecutable);
return readGitBlobSpec(spec, gitExecutable, maxSourceBytes);
}

/** Build a per-file source fetcher that caches each side's resolved text. */
export function createFileSourceFetcher(
specs: ResolvedSpecs,
{ gitExecutable = "git" }: Readonly<FileSourceFetcherOptions> = {},
{
gitExecutable = "git",
maxSourceBytes = DEFAULT_SOURCE_TEXT_MAX_BYTES,
}: Readonly<FileSourceFetcherOptions> = {},
): FileSourceFetcher {
const cache = new Map<FileSourceSide, string | null>();

Expand All @@ -169,7 +257,7 @@ export function createFileSourceFetcher(
return cache.get(side) ?? null;
}

const text = await readSpec(specs[side], { gitExecutable });
const text = await readSpec(specs[side], { gitExecutable, maxSourceBytes });
cache.set(side, text);
return text;
},
Expand Down
29 changes: 29 additions & 0 deletions src/core/loaders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { afterEach, describe, expect, test } from "bun:test";
import { mkdirSync, mkdtempSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
import { platform, tmpdir } from "node:os";
import { join } from "node:path";
import { SourceTextTooLargeError } from "./fileSource";
import { loadAppBootstrap } from "./loaders";
import type { CliInput } from "./types";

Expand Down Expand Up @@ -338,6 +339,7 @@ describe("loadAppBootstrap", () => {
deletions: 1,
});
expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0);
expect(bootstrap.changeset.files[0]?.sourceFetcher).toBeUndefined();
});

test("keeps generated large untracked files as skipped placeholders", async () => {
Expand All @@ -363,6 +365,7 @@ describe("loadAppBootstrap", () => {
});
expect(bootstrap.changeset.files[0]?.statsTruncated).toBe(false);
expect(bootstrap.changeset.files[0]?.metadata.hunks).toHaveLength(0);
expect(bootstrap.changeset.files[0]?.sourceFetcher).toBeUndefined();
});

test("caps skipped untracked-file stats when byte-size detection would require a full huge read", async () => {
Expand Down Expand Up @@ -1561,6 +1564,32 @@ describe("loadAppBootstrap source fetcher attachment", () => {
expect(await file?.sourceFetcher?.getFullText("old")).toBe("first\n");
});

test("`hunk show <ref>` refuses to expand source blobs above the source cap", async () => {
const dir = createTempRepo("hunk-source-show-large-");
const lines = Array.from({ length: 130_000 }, (_, index) => `line ${index + 1}`);
writeFileSync(join(dir, "large.txt"), `${lines.join("\n")}\n`);
git(dir, "add", "large.txt");
git(dir, "commit", "-m", "initial");

lines[65_000] = "line 65001 changed";
writeFileSync(join(dir, "large.txt"), `${lines.join("\n")}\n`);
git(dir, "add", "large.txt");
git(dir, "commit", "-m", "change middle line");

const bootstrap = await loadFromRepo(dir, {
kind: "show",
ref: "HEAD",
options: { mode: "auto" },
});

const file = bootstrap.changeset.files[0];
expect(file?.sourceFetcher).toBeDefined();
expect(file?.metadata.hunks.length).toBeGreaterThan(0);
await expect(file?.sourceFetcher?.getFullText("new")).rejects.toBeInstanceOf(
SourceTextTooLargeError,
);
});

test("`hunk show <ref>` pins expansion sources after the ref moves", async () => {
const dir = createTempRepo("hunk-source-show-pinned-");
writeFileSync(join(dir, "value.txt"), "first\n");
Expand Down
17 changes: 17 additions & 0 deletions src/ui/diff/expandCollapsedRows.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,23 @@ describe("expandCollapsedRows", () => {
expect(collapsed.text.toLowerCase()).toContain("could not load");
});

test("rewrites the label when source is too large to expand", () => {
const rows: DiffRow[] = [makeCollapsedRow("before", 0, [1, 3], [1, 3]), makeHunkHeader(0)];

const result = expandCollapsedRows(rows, {
layout: "split",
expandedKeys: new Set([gapKey("before", 0)]),
sourceStatus: { kind: "error", reason: "too-large" },
side: "new",
});

const collapsed = result[0];
if (!collapsed || collapsed.type !== "collapsed") {
throw new Error("expected first row to be collapsed");
}
expect(collapsed.text.toLowerCase()).toContain("source too large");
});

test("inserts split-line context rows after the expanded collapsed row", () => {
const rows: DiffRow[] = [makeCollapsedRow("before", 0, [1, 3], [1, 3]), makeHunkHeader(0)];

Expand Down
Loading
Loading