@@ -658,6 +658,14 @@ async function tryNativeOrchestrator(
658658
659659 if ( result . earlyExit ) {
660660 info ( 'No changes detected' ) ;
661+ // Even on no-op rebuilds, dropped-language files added since the last
662+ // full build are still missing from `nodes`/`file_hashes` (#1083). The
663+ // orchestrator's file_collector skipped them, so its earlyExit doesn't
664+ // imply DB consistency. Run the gap repair before returning.
665+ const gap = detectDroppedLanguageGap ( ctx ) ;
666+ if ( gap . missingAbs . length > 0 ) {
667+ await backfillNativeDroppedFiles ( ctx , gap ) ;
668+ }
661669 closeDbPair ( { db : ctx . db , nativeDb : ctx . nativeDb } ) ;
662670 return 'early-exit' ;
663671 }
@@ -753,37 +761,55 @@ async function tryNativeOrchestrator(
753761 // stale native binaries). WASM handles those — backfill via WASM so both
754762 // engines process the same file set (#967).
755763 //
756- // Runs on full builds and on incrementals when the orchestrator reports
757- // any file activity (removals or changes). The orchestrator's
758- // `detect_removed_files` filter (#1070) skips files outside its narrower
759- // file_collector, so on a current binary a no-op rebuild reports
760- // `removedCount=0` and `changedCount=0`, making the backfill call pure
761- // overhead (fs walk + 2 DB queries + 48-file WASM re-parse). Legacy
762- // binaries lacking the filter still report `removedCount>0` and get the
763- // gap-repair behavior #1068 introduced. Triggering on `changedCount>0`
764- // narrows (but does not fully close) the gap where a brand-new
765- // unsupported-extension file is added on an otherwise-quiet incremental
766- // — see #1091 for the residual gap.
764+ // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
765+ // both gating and the backfill itself. On dirty incrementals/full builds
766+ // the orchestrator signals trigger backfill, so the walk happens once
767+ // (instead of redundantly inside backfill). On quiet incrementals we
768+ // still pay the walk so we can detect brand-new files in dropped-language
769+ // extensions — a gap that the orchestrator's `detect_removed_files`
770+ // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
771+ // because the expensive part (WASM re-parse of the missing set) is
772+ // gated below.
767773 const removedCount = result . removedCount ?? 0 ;
768774 const changedCount = result . changedCount ?? 0 ;
769- if ( result . isFullBuild || removedCount > 0 || changedCount > 0 ) {
770- await backfillNativeDroppedFiles ( ctx ) ;
775+ const gap = detectDroppedLanguageGap ( ctx ) ;
776+ if ( result . isFullBuild || removedCount > 0 || changedCount > 0 || gap . missingAbs . length > 0 ) {
777+ await backfillNativeDroppedFiles ( ctx , gap ) ;
771778 }
772779
773780 closeDbPair ( { db : ctx . db , nativeDb : ctx . nativeDb } ) ;
774781 return formatNativeTimingResult ( p , structurePatchMs , analysisTiming ) ;
775782}
776783
784+ /** Files the native orchestrator silently dropped — the working set for backfill. */
785+ interface DroppedLanguageGap {
786+ /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
787+ missingRel : string [ ] ;
788+ /** Absolute paths, aligned by index with `missingRel`. */
789+ missingAbs : string [ ] ;
790+ }
791+
777792/**
778- * Backfill files that the native orchestrator silently dropped during parse.
779- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
793+ * Detect files the native orchestrator silently dropped.
794+ *
795+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
796+ * is "missing" if it's absent from EITHER table — both must be present for
797+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
798+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
799+ *
800+ * Restricted to files with an installed WASM grammar; extensions in
801+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
802+ * installs) can't be parsed by either engine, so they're not a native
803+ * regression — excluding them keeps the warn count in
804+ * `backfillNativeDroppedFiles` meaningful.
805+ *
806+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
807+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
808+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
809+ * connection — letting us skip the close-native / reopen-better-sqlite3
810+ * cost when there's nothing to backfill.
780811 */
781- async function backfillNativeDroppedFiles ( ctx : PipelineContext ) : Promise < void > {
782- // Compute the missing-file set FIRST, before any expensive DB handoff.
783- // NativeDbProxy supports .prepare().all(), so the upfront query works
784- // whether ctx.db is a proxy or a real better-sqlite3 connection. On
785- // incremental no-op rebuilds nothing is missing, so we want to early-return
786- // without paying the close-native / reopen-better-sqlite3 cost.
812+ function detectDroppedLanguageGap ( ctx : PipelineContext ) : DroppedLanguageGap {
787813 const collected = collectFilesUtil ( ctx . rootDir , [ ] , ctx . config , new Set < string > ( ) ) ;
788814 const expected = new Set (
789815 collected . files . map ( ( f ) => normalizePath ( path . relative ( ctx . rootDir , f ) ) ) ,
@@ -794,12 +820,6 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
794820 . all ( ) as Array < { file : string } > ;
795821 const existingNodes = new Set ( existingNodeRows . map ( ( r ) => r . file ) ) ;
796822
797- // Belt-and-suspenders: also check `file_hashes`. The fast-skip pre-flight
798- // (#1054) rejects on `file_hashes` gaps, and the two tables can diverge
799- // (e.g. a DB written by old code where `nodes` was populated but
800- // `file_hashes` was not). Treating "in nodes but not in file_hashes" as
801- // missing closes the gap so the backfill repairs the file_hashes row even
802- // when the node row already exists.
803823 let existingHashes = new Set < string > ( ) ;
804824 try {
805825 const existingHashRows = ctx . db
@@ -810,26 +830,36 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise<void> {
810830 // file_hashes table may not exist on legacy DBs; treat as fully missing
811831 // so the backfill writes rows on the upsert path below.
812832 debug (
813- `backfillNativeDroppedFiles : file_hashes read failed (table may not exist): ${ toErrorMessage ( e ) } ` ,
833+ `detectDroppedLanguageGap : file_hashes read failed (table may not exist): ${ toErrorMessage ( e ) } ` ,
814834 ) ;
815835 }
816836
817- // Restrict backfill to files with an installed WASM grammar. Extensions in
818- // LANGUAGE_REGISTRY without a shipped grammar file (e.g. groovy, erlang on
819- // minimal installs) can't be parsed by either engine, so they're not a
820- // native regression — excluding them keeps the warn count meaningful.
821837 const installedExts = getInstalledWasmExtensions ( ) ;
822838 const missingRel : string [ ] = [ ] ;
823839 const missingAbs : string [ ] = [ ] ;
824840 for ( const rel of expected ) {
825- // A file is "missing" if it's absent from EITHER nodes OR file_hashes.
826- // Both must be present for fast-skip to work correctly.
827841 if ( existingNodes . has ( rel ) && existingHashes . has ( rel ) ) continue ;
828842 const ext = path . extname ( rel ) . toLowerCase ( ) ;
829843 if ( ! installedExts . has ( ext ) ) continue ;
830844 missingRel . push ( rel ) ;
831845 missingAbs . push ( path . join ( ctx . rootDir , rel ) ) ;
832846 }
847+ return { missingRel, missingAbs } ;
848+ }
849+
850+ /**
851+ * Backfill files that the native orchestrator silently dropped during parse.
852+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
853+ *
854+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
855+ * can use the same scan for both gating and the actual backfill — avoiding
856+ * a redundant fs walk when the orchestrator's signals already triggered.
857+ */
858+ async function backfillNativeDroppedFiles (
859+ ctx : PipelineContext ,
860+ gap : DroppedLanguageGap ,
861+ ) : Promise < void > {
862+ const { missingRel, missingAbs } = gap ;
833863 if ( missingAbs . length === 0 ) return ;
834864
835865 // Now that we know there's work to do, hand off to better-sqlite3 (needed
0 commit comments