Skip to content

Commit f2c47f0

Browse files
authored
Merge branch 'main' into fix/incremental-report-trend-fallback
2 parents e86c257 + 51aaf94 commit f2c47f0

5 files changed

Lines changed: 446 additions & 79 deletions

File tree

crates/codegraph-core/src/extractors/r_lang.rs

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,14 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
163163
handle_set_class(node, source, symbols);
164164
return;
165165
}
166-
"setGeneric" | "setMethod" => {
166+
"setGeneric" => {
167167
handle_set_generic(node, source, symbols);
168168
return;
169169
}
170+
"setMethod" => {
171+
handle_set_method(node, source, symbols);
172+
return;
173+
}
170174
_ => {}
171175
}
172176
}
@@ -332,6 +336,23 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
332336
}
333337
}
334338

339+
// `setMethod("greet", "Person", function(x) ...)` registers an implementation
340+
// of the generic `greet` — it is not a new top-level definition. Emitting a
341+
// definition here produced two `function` nodes with the same name (one from
342+
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
343+
// the generic instead; the method body's calls are still picked up by the
344+
// recursive walk of the anonymous function argument.
345+
fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
346+
if let Some(name) = first_argument_value(node, source, false) {
347+
symbols.calls.push(Call {
348+
name,
349+
line: start_line(node),
350+
dynamic: None,
351+
receiver: None,
352+
});
353+
}
354+
}
355+
335356
#[cfg(test)]
336357
mod tests {
337358
use super::*;
@@ -439,6 +460,51 @@ mod tests {
439460
assert_eq!(d.kind, "function");
440461
}
441462

463+
#[test]
464+
fn set_method_does_not_duplicate_generic_definition() {
465+
// Idiomatic S4: a setGeneric followed by setMethod implementations.
466+
// Only the setGeneric should emit a definition — setMethod registers
467+
// an implementation of the generic, which we model as a call edge.
468+
let code = r#"
469+
setGeneric("greet", function(x) standardGeneric("greet"))
470+
setMethod("greet", "Person", function(x) paste("Hello", x@name))
471+
setMethod("greet", "Animal", function(x) paste("Hi", x@species))
472+
"#;
473+
let s = parse_r(code);
474+
let greet_defs: Vec<&Definition> =
475+
s.definitions.iter().filter(|d| d.name == "greet").collect();
476+
assert_eq!(
477+
greet_defs.len(),
478+
1,
479+
"expected exactly one `greet` definition, got {greet_defs:#?}",
480+
);
481+
assert_eq!(greet_defs[0].kind, "function");
482+
}
483+
484+
#[test]
485+
fn set_method_emits_call_to_generic() {
486+
// setMethod registers an implementation of the generic. The fix emits
487+
// a call edge to the generic so the dispatch relationship is visible
488+
// in the graph.
489+
let s = parse_r(
490+
"setMethod(\"greet\", \"Person\", function(x) paste(\"Hello\", x@name))\n",
491+
);
492+
let calls: Vec<&Call> = s.calls.iter().filter(|c| c.name == "greet").collect();
493+
assert_eq!(calls.len(), 1, "expected setMethod to emit one call to `greet`");
494+
}
495+
496+
#[test]
497+
fn set_method_body_calls_are_still_captured() {
498+
// The recursive walk visits the anonymous function passed to
499+
// setMethod, so calls inside the method body must still appear.
500+
let s = parse_r(
501+
"setMethod(\"greet\", \"Person\", function(x) { helper(x); validate(x) })\n",
502+
);
503+
let names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect();
504+
assert!(names.contains(&"helper"), "method body call `helper` not captured");
505+
assert!(names.contains(&"validate"), "method body call `validate` not captured");
506+
}
507+
442508
#[test]
443509
fn function_with_double_arrow_assignment() {
444510
// `<<-` is super-assignment in R; the JS extractor accepts it too.

src/domain/graph/builder/pipeline.ts

Lines changed: 64 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,14 @@ async function tryNativeOrchestrator(
658658

659659
if (result.earlyExit) {
660660
info('No changes detected');
661+
// Even on no-op rebuilds, dropped-language files added since the last
662+
// full build are still missing from `nodes`/`file_hashes` (#1083). The
663+
// orchestrator's file_collector skipped them, so its earlyExit doesn't
664+
// imply DB consistency. Run the gap repair before returning.
665+
const gap = detectDroppedLanguageGap(ctx);
666+
if (gap.missingAbs.length > 0) {
667+
await backfillNativeDroppedFiles(ctx, gap);
668+
}
661669
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
662670
return 'early-exit';
663671
}
@@ -753,37 +761,55 @@ async function tryNativeOrchestrator(
753761
// stale native binaries). WASM handles those — backfill via WASM so both
754762
// engines process the same file set (#967).
755763
//
756-
// Runs on full builds and on incrementals when the orchestrator reports
757-
// any file activity (removals or changes). The orchestrator's
758-
// `detect_removed_files` filter (#1070) skips files outside its narrower
759-
// file_collector, so on a current binary a no-op rebuild reports
760-
// `removedCount=0` and `changedCount=0`, making the backfill call pure
761-
// overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
762-
// binaries lacking the filter still report `removedCount>0` and get the
763-
// gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
764-
// narrows (but does not fully close) the gap where a brand-new
765-
// unsupported-extension file is added on an otherwise-quiet incremental
766-
// — see #1091 for the residual gap.
764+
// Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
765+
// both gating and the backfill itself. On dirty incrementals/full builds
766+
// the orchestrator signals trigger backfill, so the walk happens once
767+
// (instead of redundantly inside backfill). On quiet incrementals we
768+
// still pay the walk so we can detect brand-new files in dropped-language
769+
// extensions — a gap that the orchestrator's `detect_removed_files`
770+
// filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
771+
// because the expensive part (WASM re-parse of the missing set) is
772+
// gated below.
767773
const removedCount = result.removedCount ?? 0;
768774
const changedCount = result.changedCount ?? 0;
769-
if (result.isFullBuild || removedCount > 0 || changedCount > 0) {
770-
await backfillNativeDroppedFiles(ctx);
775+
const gap = detectDroppedLanguageGap(ctx);
776+
if (result.isFullBuild || removedCount > 0 || changedCount > 0 || gap.missingAbs.length > 0) {
777+
await backfillNativeDroppedFiles(ctx, gap);
771778
}
772779

773780
closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
774781
return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
775782
}
776783

784+
/** Files the native orchestrator silently dropped — the working set for backfill. */
785+
interface DroppedLanguageGap {
786+
/** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
787+
missingRel: string[];
788+
/** Absolute paths, aligned by index with `missingRel`. */
789+
missingAbs: string[];
790+
}
791+
777792
/**
778-
* Backfill files that the native orchestrator silently dropped during parse.
779-
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
793+
* Detect files the native orchestrator silently dropped.
794+
*
795+
* Walks the filesystem and compares against `nodes` + `file_hashes`. A file
796+
* is "missing" if it's absent from EITHER table — both must be present for
797+
* the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
798+
* legacy DBs where `nodes` was populated but `file_hashes` was not).
799+
*
800+
* Restricted to files with an installed WASM grammar; extensions in
801+
* `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
802+
* installs) can't be parsed by either engine, so they're not a native
803+
* regression — excluding them keeps the warn count in
804+
* `backfillNativeDroppedFiles` meaningful.
805+
*
806+
* Cheap (no DB handoff, no parsing): used both to gate the backfill call
807+
* and as its working set. NativeDbProxy supports `.prepare().all()`, so
808+
* this works whether `ctx.db` is a proxy or a real better-sqlite3
809+
* connection — letting us skip the close-native / reopen-better-sqlite3
810+
* cost when there's nothing to backfill.
780811
*/
781-
async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
782-
// Compute the missing-file set FIRST, before any expensive DB handoff.
783-
// NativeDbProxy supports .prepare().all(), so the upfront query works
784-
// whether ctx.db is a proxy or a real better-sqlite3 connection. On
785-
// incremental no-op rebuilds nothing is missing, so we want to early-return
786-
// without paying the close-native / reopen-better-sqlite3 cost.
812+
function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
787813
const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
788814
const expected = new Set(
789815
collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
@@ -794,12 +820,6 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
794820
.all() as Array<{ file: string }>;
795821
const existingNodes = new Set(existingNodeRows.map((r) => r.file));
796822

797-
// Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
798-
// (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
799-
// (e.g. a DB written by old code where `nodes` was populated but
800-
// `file_hashes` was not). Treating "in nodes but not in file_hashes" as
801-
// missing closes the gap so the backfill repairs the file_hashes row even
802-
// when the node row already exists.
803823
let existingHashes = new Set<string>();
804824
try {
805825
const existingHashRows = ctx.db
@@ -810,26 +830,36 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
810830
// file_hashes table may not exist on legacy DBs; treat as fully missing
811831
// so the backfill writes rows on the upsert path below.
812832
debug(
813-
`backfillNativeDroppedFiles: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
833+
`detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
814834
);
815835
}
816836

817-
// Restrict backfill to files with an installed WASM grammar. Extensions in
818-
// LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
819-
// minimal installs) can't be parsed by either engine, so they're not a
820-
// native regression — excluding them keeps the warn count meaningful.
821837
const installedExts = getInstalledWasmExtensions();
822838
const missingRel: string[] = [];
823839
const missingAbs: string[] = [];
824840
for (const rel of expected) {
825-
// A file is "missing" if it's absent from EITHER nodes OR file_hashes.
826-
// Both must be present for fast-skip to work correctly.
827841
if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
828842
const ext = path.extname(rel).toLowerCase();
829843
if (!installedExts.has(ext)) continue;
830844
missingRel.push(rel);
831845
missingAbs.push(path.join(ctx.rootDir, rel));
832846
}
847+
return { missingRel, missingAbs };
848+
}
849+
850+
/**
851+
* Backfill files that the native orchestrator silently dropped during parse.
852+
* Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
853+
*
854+
* Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
855+
* can use the same scan for both gating and the actual backfill — avoiding
856+
* a redundant fs walk when the orchestrator's signals already triggered.
857+
*/
858+
async function backfillNativeDroppedFiles(
859+
ctx: PipelineContext,
860+
gap: DroppedLanguageGap,
861+
): Promise<void> {
862+
const { missingRel, missingAbs } = gap;
833863
if (missingAbs.length === 0) return;
834864

835865
// Now that we know there's work to do, hand off to better-sqlite3 (needed

src/extractors/r.ts

Lines changed: 63 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
125125
return;
126126
}
127127

128-
if (funcName === 'setGeneric' || funcName === 'setMethod') {
128+
if (funcName === 'setGeneric') {
129129
handleSetGeneric(node, ctx);
130130
return;
131131
}
132132

133+
if (funcName === 'setMethod') {
134+
handleSetMethod(node, ctx);
135+
return;
136+
}
137+
133138
// Regular call
134139
if (funcNode.type === 'identifier') {
135140
ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 });
@@ -212,63 +217,77 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
212217
}
213218

214219
function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
215-
for (let i = 0; i < node.childCount; i++) {
216-
const child = node.child(i);
217-
if (!child || child.type !== 'arguments') continue;
218-
for (let j = 0; j < child.childCount; j++) {
219-
const arg = child.child(j);
220-
if (!arg) continue;
221-
if (arg.type === 'string') {
222-
const text = arg.text.replace(/^["']|["']$/g, '');
223-
ctx.imports.push({
224-
source: text,
225-
names: ['source'],
226-
line: node.startPosition.row + 1,
227-
});
228-
return;
229-
}
230-
}
231-
}
220+
// source() only accepts string literals — `source(varname)` is not an import.
221+
const path = firstStringArgument(node);
222+
if (path === null) return;
223+
ctx.imports.push({
224+
source: path,
225+
names: ['source'],
226+
line: node.startPosition.row + 1,
227+
});
232228
}
233229

234230
function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void {
235-
for (let i = 0; i < node.childCount; i++) {
236-
const child = node.child(i);
237-
if (!child || child.type !== 'arguments') continue;
238-
for (let j = 0; j < child.childCount; j++) {
239-
const arg = child.child(j);
240-
if (!arg) continue;
241-
if (arg.type === 'string') {
242-
const name = arg.text.replace(/^["']|["']$/g, '');
243-
ctx.definitions.push({
244-
name,
245-
kind: 'class',
246-
line: node.startPosition.row + 1,
247-
endLine: nodeEndLine(node),
248-
});
249-
return;
250-
}
251-
}
252-
}
231+
const name = firstStringArgument(node);
232+
if (name === null) return;
233+
ctx.definitions.push({
234+
name,
235+
kind: 'class',
236+
line: node.startPosition.row + 1,
237+
endLine: nodeEndLine(node),
238+
});
253239
}
254240

255241
function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void {
242+
const name = firstStringArgument(node);
243+
if (name === null) return;
244+
ctx.definitions.push({
245+
name,
246+
kind: 'function',
247+
line: node.startPosition.row + 1,
248+
endLine: nodeEndLine(node),
249+
});
250+
}
251+
252+
// setMethod("greet", "Person", function(x) ...) registers an implementation of
253+
// the generic `greet` — it is not a new top-level definition. Emitting a
254+
// definition here produced two `function` nodes with the same name (one from
255+
// setGeneric, one from setMethod) and broke resolution. Emit a call edge to
256+
// the generic instead; the method body's calls are still picked up by the
257+
// recursive walk of the anonymous function argument.
258+
function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void {
259+
const name = firstStringArgument(node);
260+
if (name === null) return;
261+
ctx.calls.push({ name, line: node.startPosition.row + 1 });
262+
}
263+
264+
// tree-sitter-r wraps each positional argument in an `argument` node that
265+
// contains the actual `string` (or `identifier`) child, so the inner string
266+
// must be unwrapped — checking `child.type === 'string'` directly misses it.
267+
// Mirrors `first_argument_value` in the Rust extractor for parity.
268+
function firstStringArgument(node: TreeSitterNode): string | null {
256269
for (let i = 0; i < node.childCount; i++) {
257270
const child = node.child(i);
258271
if (!child || child.type !== 'arguments') continue;
259272
for (let j = 0; j < child.childCount; j++) {
260273
const arg = child.child(j);
261274
if (!arg) continue;
262275
if (arg.type === 'string') {
263-
const name = arg.text.replace(/^["']|["']$/g, '');
264-
ctx.definitions.push({
265-
name,
266-
kind: 'function',
267-
line: node.startPosition.row + 1,
268-
endLine: nodeEndLine(node),
269-
});
270-
return;
276+
return stripQuotes(arg.text);
277+
}
278+
if (arg.type === 'argument') {
279+
const valueNode = arg.childForFieldName('value');
280+
if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text);
281+
for (let k = 0; k < arg.childCount; k++) {
282+
const inner = arg.child(k);
283+
if (inner && inner.type === 'string') return stripQuotes(inner.text);
284+
}
271285
}
272286
}
273287
}
288+
return null;
289+
}
290+
291+
function stripQuotes(text: string): string {
292+
return text.replace(/^["']|["']$/g, '');
274293
}

0 commit comments

Comments
 (0)