Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/domain/graph/builder/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import {
formatDropExtensionSummary,
getActiveEngine,
getInstalledWasmExtensions,
parseFilesAuto,
parseFilesWasmForBackfill,
} from '../../parser.js';
import { setWorkspaces } from '../resolve.js';
import { PipelineContext } from './context.js';
Expand Down Expand Up @@ -793,7 +793,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
`Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
);
}
const wasmResults = await parseFilesAuto(missingAbs, ctx.rootDir, { engine: 'wasm' });
const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);

const rows: unknown[][] = [];
const exportKeys: unknown[][] = [];
Expand Down Expand Up @@ -853,6 +853,27 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
updateStmt.run(...vals);
}
}

// Free WASM parse trees from the inline backfill path (#1058).
// `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
// backed by WASM linear memory) on every result, but these symbols are
// consumed locally for DB row construction and never added to
// `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
// sees them. Without this, trees leak WASM memory until process exit —
// bounded per run but cumulative across in-process integration tests.
// Mirrors the cleanup discipline established for #931.
for (const [, symbols] of wasmResults) {
const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
if (tree && typeof tree.delete === 'function') {
try {
tree.delete();
} catch {
/* ignore cleanup errors */
}
}
(symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
(symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
}
}

// ── Pipeline stages execution ───────────────────────────────────────────
Expand Down
67 changes: 66 additions & 1 deletion src/domain/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,71 @@ async function parseFilesWasm(
return result;
}

/**
* Files at or below this count use the inline parse path (no worker spawn).
*
* Sized for typical engine-parity drops: a handful of fixture files in one
* or two languages (the recurring HCL case is 4 files). Above this, the
* worker-pool's IPC + crash-isolation cost (#965) is amortized over enough
* parse work to be worth paying; below it, the ~1–2s cold-start dominates.
*/
const INLINE_BACKFILL_THRESHOLD = 16;

/**
* Inline WASM parse (no worker) for small file batches.
*
* Used by the engine-parity backfill path when the native engine drops a
* handful of files (typically test fixtures). The worker pool's per-call
* IPC + grammar-init overhead can cost 1–2s on slow CI runners — for a
* 4-file backfill, that dwarfs the ~10ms of actual parse work.
*
* Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow
* visitors via the unified walker (mirrors how WASM-engine results behaved
* before the worker pool was introduced).
*/
async function parseFilesWasmInline(
filePaths: string[],
rootDir: string,
): Promise<Map<string, ExtractorOutput>> {
const result = new Map<string, ExtractorOutput>();
if (filePaths.length === 0) return result;
const parsers = await ensureParsersForFiles(filePaths);
for (const filePath of filePaths) {
if (!_extToLang.has(path.extname(filePath).toLowerCase())) continue;
let code: string;
try {
code = fs.readFileSync(filePath, 'utf-8');
} catch (err: unknown) {
warn(`Skipping ${path.relative(rootDir, filePath)}: ${(err as Error).message}`);
continue;
}
const extracted = wasmExtractSymbols(parsers, filePath, code);
if (!extracted) continue;
const relPath = path.relative(rootDir, filePath).split(path.sep).join('/');
const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string };
symbols._tree = extracted.tree;
symbols._langId = extracted.langId;
result.set(relPath, symbols);
}
return result;
}

/**
* Backfill helper: small batches use the inline (main-thread) path; larger
* batches keep the worker-pool isolation against tree-sitter WASM crashes
* (#965). Threshold matches typical engine-parity drop sizes (a few fixture
* files in one or two languages).
*/
export async function parseFilesWasmForBackfill(
filePaths: string[],
rootDir: string,
): Promise<Map<string, ExtractorOutput>> {
if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) {
return parseFilesWasmInline(filePaths, rootDir);
}
return parseFilesWasm(filePaths, rootDir);
}

/**
* Parse multiple files in bulk and return a Map<relPath, symbols>.
*/
Expand Down Expand Up @@ -1117,7 +1182,7 @@ export async function parseFilesAuto(
);
if (dropped.length > 0) {
warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
const wasmResults = await parseFilesWasm(dropped, rootDir);
const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
for (const [relPath, symbols] of wasmResults) {
result.set(relPath, symbols);
}
Expand Down
Loading