Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,9 @@ async function ensureWasmTreesIfNeeded(
if (needsWasmTrees) {
try {
const { ensureWasmTrees } = await getParserModule();
await ensureWasmTrees(fileSymbols, rootDir);
await ensureWasmTrees(fileSymbols, rootDir, (relPath, symbols) =>
fileNeedsWasmTree(relPath, symbols, flags),
);
} catch (err: unknown) {
debug(`ensureWasmTrees failed: ${toErrorMessage(err)}`);
}
Expand Down
46 changes: 37 additions & 9 deletions src/ast-analysis/visitors/ast-store-visitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ function extractChildExpressionText(node: TreeSitterNode): string | null {
return truncate(node.text);
}

/**
* Count code points cheaply: skip the `[...s]` spread when `s.length` already
* decides the answer. Each code point is 1 or 2 UTF-16 units, so `.length < 2`
* implies `< 2` code points and `.length >= 4` implies `>= 2` code points
* (at most 2 surrogate pairs). Only `.length` of 2 or 3 needs the spread to
* disambiguate the surrogate-pair edge case.
*/
function codePointCountAtLeast2(s: string): boolean {
const len = s.length;
if (len < 2) return false;
if (len >= 4) return true;
return [...s].length >= 2;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 len === 3 spread is always redundant

A UTF-16 string of length 3 must contain at least 2 code points (worst case: one surrogate pair + one regular char = 2 code points; all other combinations give ≥ 3). The [...s] spread for len === 3 always evaluates to true, so you can short-circuit it with an early return, keeping the spread only for the ambiguous len === 2 case.

Suggested change
if (len >= 4) return true;
return [...s].length >= 2;
if (len >= 3) return true;
return [...s].length >= 2;

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in c0a089f — folded the redundant len===3 case into the fast path so the spread only runs for the genuinely ambiguous len===2 case. Updated the comment to spell out the worst-case reasoning (1 surrogate pair + 1 BMP char = 2 code points).

}

/**
* Extract string content from a string-literal node, mirroring the native
* engine's `build_string_node` (`helpers.rs`). Returns `null` when the
Expand All @@ -142,15 +156,27 @@ function extractStringContent(node: TreeSitterNode, cfg: AstStringConfig): strin

let s = raw;
s = trimLeadingChars(s, '@');
s = trimLeadingChars(s, cfg.stringPrefixes);
if (cfg.stringPrefixes) s = trimLeadingChars(s, cfg.stringPrefixes);
if (isRawString) s = trimLeadingChars(s, 'r#');
s = trimLeadingChars(s, cfg.quoteChars);
if (isRawString) s = trimTrailingChars(s, '#');
s = trimTrailingChars(s, cfg.quoteChars);

// Count code points, not UTF-16 code units — matches Rust `chars().count()`.
const codePointCount = [...s].length;
if (codePointCount < 2) return null;
return codePointCountAtLeast2(s) ? s : null;
}

// Per-astTypeMap cache for the set of node-types that map to kind 'new'.
// Computed once per unique astTypeMap reference (one per language) instead
// of once per file.
const _newTypesCache = new WeakMap<Record<string, string>, Set<string>>();
function newTypesFor(astTypeMap: Record<string, string>): Set<string> {
let s = _newTypesCache.get(astTypeMap);
if (s) return s;
s = new Set<string>();
for (const type in astTypeMap) {
if (astTypeMap[type] === 'new') s.add(type);
}
_newTypesCache.set(astTypeMap, s);
return s;
}

Expand All @@ -164,11 +190,12 @@ export function createAstStoreVisitor(
): Visitor {
const rows: AstStoreRow[] = [];
const matched = new Set<number>();
const newTypes = new Set<string>(
Object.entries(astTypeMap)
.filter(([, kind]) => kind === 'new')
.map(([type]) => type),
);
const newTypes = newTypesFor(astTypeMap);
// When nodeIdMap is empty, parentNodeId resolution is wasted work — the
// worker passes an empty map and the main thread re-resolves against its
// own DB-populated map in features/ast.ts::collectFileAstRows. Skip the
// findParentDef linear scan in that case.
const skipParentLookup = nodeIdMap.size === 0;

function findParentDef(line: number): Definition | null {
let best: Definition | null = null;
Expand All @@ -183,6 +210,7 @@ export function createAstStoreVisitor(
}

function resolveParentNodeId(line: number): number | null {
if (skipParentLookup) return null;
const parentDef = findParentDef(line);
if (!parentDef) return null;
return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
Expand Down
7 changes: 7 additions & 0 deletions src/domain/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,16 +316,23 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
*
* Name is preserved for caller compatibility; the function now ensures
* *analysis data* rather than *trees*.
*
* `needsFn` (optional): when provided, only files for which it returns true are
* re-parsed. Without it the function falls back to "any WASM-parseable file
* without _tree", which was the source of #1036 — a single file missing one
* analysis triggered a full-build re-parse of every WASM-parseable file.
*/
export async function ensureWasmTrees(
fileSymbols: Map<string, any>,
rootDir: string,
needsFn?: (relPath: string, symbols: any) => boolean,
): Promise<void> {
// Collect files that still need analysis data and are parseable by WASM.
const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
for (const [relPath, symbols] of fileSymbols) {
if (symbols._tree) continue; // legacy path — leave existing trees alone
if (!_extToLang.has(path.extname(relPath).toLowerCase())) continue;
if (needsFn && !needsFn(relPath, symbols)) continue;
pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
}
if (pending.length === 0) return;
Expand Down
24 changes: 12 additions & 12 deletions src/domain/wasm-worker-entry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -708,18 +708,18 @@ async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractor
file?: string;
parentNodeId?: number | null;
}>;
if (astRows.length > 0) {
// Strip `file` and `parentNodeId` — main thread re-resolves parent IDs
// against its DB in features/ast.ts::collectFileAstRows, and `file` is
// known from the map key.
serializedAstNodes = astRows.map((n) => ({
line: n.line,
kind: n.kind,
name: n.name ?? '',
text: n.text ?? undefined,
receiver: n.receiver ?? undefined,
}));
}
// Always set an array (even empty) — leaving astNodes undefined makes
// engine.ts::fileNeedsWasmTree treat the file as un-walked and trigger
// a full ensureWasmTrees re-parse of every WASM-parseable file (#1036).
// Strip `file` and `parentNodeId` — main thread re-resolves both in
// features/ast.ts::collectFileAstRows.
serializedAstNodes = astRows.map((n) => ({
line: n.line,
kind: n.kind,
name: n.name ?? '',
text: n.text ?? undefined,
receiver: n.receiver ?? undefined,
}));
}

if (complexityVisitor) storeComplexityResults(results, defs, entry.id);
Expand Down
Loading