Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,9 @@ async function ensureWasmTreesIfNeeded(
if (needsWasmTrees) {
try {
const { ensureWasmTrees } = await getParserModule();
await ensureWasmTrees(fileSymbols, rootDir);
await ensureWasmTrees(fileSymbols, rootDir, (relPath, symbols) =>
fileNeedsWasmTree(relPath, symbols, flags),
);
} catch (err: unknown) {
debug(`ensureWasmTrees failed: ${toErrorMessage(err)}`);
}
Expand Down
47 changes: 38 additions & 9 deletions src/ast-analysis/visitors/ast-store-visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,21 @@ function extractChildExpressionText(node: TreeSitterNode): string | null {
return truncate(node.text);
}

/**
* Count code points cheaply: skip the `[...s]` spread when `s.length` already
* decides the answer. Each code point is 1 or 2 UTF-16 units, so `.length < 2`
* implies `< 2` code points and `.length >= 3` already guarantees `>= 2` code
* points (worst case: one surrogate pair + one BMP char = 2 code points).
* Only `.length === 2` is genuinely ambiguous (could be a single surrogate
* pair = 1 code point, or two BMP chars = 2 code points) and needs the spread.
*/
function codePointCountAtLeast2(s: string): boolean {
const len = s.length;
if (len < 2) return false;
if (len >= 3) return true;
return [...s].length >= 2;
}

/**
* Extract string content from a string-literal node, mirroring the native
* engine's `build_string_node` (`helpers.rs`). Returns `null` when the
Expand All @@ -142,15 +157,27 @@ function extractStringContent(node: TreeSitterNode, cfg: AstStringConfig): strin

let s = raw;
s = trimLeadingChars(s, '@');
s = trimLeadingChars(s, cfg.stringPrefixes);
if (cfg.stringPrefixes) s = trimLeadingChars(s, cfg.stringPrefixes);
if (isRawString) s = trimLeadingChars(s, 'r#');
s = trimLeadingChars(s, cfg.quoteChars);
if (isRawString) s = trimTrailingChars(s, '#');
s = trimTrailingChars(s, cfg.quoteChars);

// Count code points, not UTF-16 code units — matches Rust `chars().count()`.
const codePointCount = [...s].length;
if (codePointCount < 2) return null;
return codePointCountAtLeast2(s) ? s : null;
}

// Per-astTypeMap cache for the set of node-types that map to kind 'new'.
// Computed once per unique astTypeMap reference (one per language) instead
// of once per file.
const _newTypesCache = new WeakMap<Record<string, string>, Set<string>>();
function newTypesFor(astTypeMap: Record<string, string>): Set<string> {
let s = _newTypesCache.get(astTypeMap);
if (s) return s;
s = new Set<string>();
for (const type in astTypeMap) {
if (astTypeMap[type] === 'new') s.add(type);
}
_newTypesCache.set(astTypeMap, s);
return s;
}

Expand All @@ -164,11 +191,12 @@ export function createAstStoreVisitor(
): Visitor {
const rows: AstStoreRow[] = [];
const matched = new Set<number>();
const newTypes = new Set<string>(
Object.entries(astTypeMap)
.filter(([, kind]) => kind === 'new')
.map(([type]) => type),
);
const newTypes = newTypesFor(astTypeMap);
// When nodeIdMap is empty, parentNodeId resolution is wasted work — the
// worker passes an empty map and the main thread re-resolves against its
// own DB-populated map in features/ast.ts::collectFileAstRows. Skip the
// findParentDef linear scan in that case.
const skipParentLookup = nodeIdMap.size === 0;

function findParentDef(line: number): Definition | null {
let best: Definition | null = null;
Expand All @@ -183,6 +211,7 @@ export function createAstStoreVisitor(
}

function resolveParentNodeId(line: number): number | null {
if (skipParentLookup) return null;
const parentDef = findParentDef(line);
if (!parentDef) return null;
return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
Expand Down
7 changes: 7 additions & 0 deletions src/domain/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,16 +316,23 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
*
* Name is preserved for caller compatibility; the function now ensures
* *analysis data* rather than *trees*.
*
* `needsFn` (optional): when provided, only files for which it returns true are
* re-parsed. Without it the function falls back to "any WASM-parseable file
* without _tree", which was the source of #1036 — a single file missing one
* analysis triggered a full-build re-parse of every WASM-parseable file.
*/
export async function ensureWasmTrees(
fileSymbols: Map<string, any>,
rootDir: string,
needsFn?: (relPath: string, symbols: any) => boolean,
): Promise<void> {
// Collect files that still need analysis data and are parseable by WASM.
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
for (const [relPath, symbols] of fileSymbols) {
if (symbols._tree) continue; // legacy path — leave existing trees alone
if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
if (needsFn && !needsFn(relPath, symbols)) continue;
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
}
if (pending.length === 0) return;
Expand Down
24 changes: 12 additions & 12 deletions src/domain/wasm-worker-entry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -708,18 +708,18 @@ async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractor
file?: string;
parentNodeId?: number | null;
}>;
if (astRows.length > 0) {
// Strip `file` and `parentNodeId` — main thread re-resolves parent IDs
// against its DB in features/ast.ts::collectFileAstRows, and `file` is
// known from the map key.
serializedAstNodes = astRows.map((n) => ({
line: n.line,
kind: n.kind,
name: n.name ?? '',
text: n.text ?? undefined,
receiver: n.receiver ?? undefined,
}));
}
// Always set an array (even empty) — leaving astNodes undefined makes
// engine.ts::fileNeedsWasmTree treat the file as un-walked and trigger
// a full ensureWasmTrees re-parse of every WASM-parseable file (#1036).
// Strip `file` and `parentNodeId` — main thread re-resolves both in
// features/ast.ts::collectFileAstRows.
serializedAstNodes = astRows.map((n) => ({
line: n.line,
kind: n.kind,
name: n.name ?? '',
text: n.text ?? undefined,
receiver: n.receiver ?? undefined,
}));
}

if (complexityVisitor) storeComplexityResults(results, defs, entry.id);
Expand Down
19 changes: 19 additions & 0 deletions tests/benchmarks/regression-guard.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ const SKIP_VERSIONS = new Set(['3.8.0']);
*
* - 3.9.2:Full build — NativeDbProxy overhead causes native full build to
* regress from 5206ms to 9403ms (+81%). Fix tracked in PR #906.
*
* - 3.9.6:Build ms/file / 3.9.6:No-op rebuild — WASM full build regressed
* (#1036) when PR #1016 expanded AST_TYPE_MAPS from 3 to 23 languages,
* causing zero-AST-row files to return `astNodes: undefined` and trigger
* a full-corpus re-parse. Fixed by PR #1038. Benchmarks captured before
* the fix landed; will reclear in v3.9.7+ data.
*
* - 3.9.6:Query time — native query benchmark sample-noise blip (29.4 → 47ms)
* above the natural variance of the small target set. Not reproducible
* locally (~30ms steady-state); will be re-validated on v3.9.7+ data.
*
* - 3.9.6:resolution haskell precision/recall — separate Haskell resolver
* regression introduced in 3.9.6, unrelated to #1036 / PR #1038. Tracked
* in #1039.
*/
const KNOWN_REGRESSIONS = new Set([
'3.9.0:1-file rebuild',
Expand All @@ -87,6 +101,11 @@ const KNOWN_REGRESSIONS = new Set([
'3.9.0:fnDeps depth 5',
'3.9.1:1-file rebuild',
'3.9.2:Full build',
'3.9.6:Build ms/file',
'3.9.6:No-op rebuild',
'3.9.6:Query time',
'3.9.6:resolution haskell precision',
'3.9.6:resolution haskell recall',
]);

/**
Expand Down
Loading