@@ -15,6 +15,7 @@ import {
1515 initSchema ,
1616 MIGRATIONS ,
1717 openDb ,
18+ purgeFilesData ,
1819 releaseAdvisoryLock ,
1920 setBuildMeta ,
2021} from '../../../db/index.js' ;
@@ -38,6 +39,7 @@ import {
3839 formatDropExtensionSummary ,
3940 getActiveEngine ,
4041 getInstalledWasmExtensions ,
42+ NATIVE_SUPPORTED_EXTENSIONS ,
4143 parseFilesWasmForBackfill ,
4244} from '../../parser.js' ;
4345import { writeJournalHeader } from '../journal.js' ;
@@ -659,11 +661,12 @@ async function tryNativeOrchestrator(
659661 if ( result . earlyExit ) {
660662 info ( 'No changes detected' ) ;
661663 // Even on no-op rebuilds, dropped-language files added since the last
662- // full build are still missing from `nodes`/`file_hashes` (#1083). The
663- // orchestrator's file_collector skipped them, so its earlyExit doesn't
664- // imply DB consistency. Run the gap repair before returning.
664+ // full build are still missing from `nodes`/`file_hashes` (#1083), and
665+ // WASM-only files deleted from disk leave stale rows behind (#1073).
666+ // The orchestrator's file_collector skipped them, so its earlyExit
667+ // doesn't imply DB consistency. Run the gap repair before returning.
665668 const gap = detectDroppedLanguageGap ( ctx ) ;
666- if ( gap . missingAbs . length > 0 ) {
669+ if ( gap . missingAbs . length > 0 || gap . staleRel . length > 0 ) {
667670 await backfillNativeDroppedFiles ( ctx , gap ) ;
668671 }
669672 closeDbPair ( { db : ctx . db , nativeDb : ctx . nativeDb } ) ;
@@ -773,7 +776,13 @@ async function tryNativeOrchestrator(
773776 const removedCount = result . removedCount ?? 0 ;
774777 const changedCount = result . changedCount ?? 0 ;
775778 const gap = detectDroppedLanguageGap ( ctx ) ;
776- if ( result . isFullBuild || removedCount > 0 || changedCount > 0 || gap . missingAbs . length > 0 ) {
779+ if (
780+ result . isFullBuild ||
781+ removedCount > 0 ||
782+ changedCount > 0 ||
783+ gap . missingAbs . length > 0 ||
784+ gap . staleRel . length > 0
785+ ) {
777786 await backfillNativeDroppedFiles ( ctx , gap ) ;
778787 }
779788
@@ -787,6 +796,101 @@ interface DroppedLanguageGap {
787796 missingRel : string [ ] ;
788797 /** Absolute paths, aligned by index with `missingRel`. */
789798 missingAbs : string [ ] ;
799+ /**
800+ * Relative paths of WASM-only files present in DB but absent from disk (#1073).
801+ * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
802+ * backfill must purge them. Always disjoint from `missingRel`.
803+ */
804+ staleRel : string [ ] ;
805+ }
806+
807+ /**
808+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
809+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
810+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
811+ */
812+ export interface WasmOnlyStaleFilesInput {
813+ /** Distinct `file` values from the `nodes` table. */
814+ existingNodes : ReadonlySet < string > ;
815+ /** Distinct `file` values from the `file_hashes` table. */
816+ existingHashes : ReadonlySet < string > ;
817+ /** Relative paths currently on disk (from `collectFilesUtil`). */
818+ expected : ReadonlySet < string > ;
819+ /** Lowercased extensions whose WASM grammar is installed. */
820+ installedExts : ReadonlySet < string > ;
821+ /** Extensions covered by the Rust addon — Rust owns deletion for these. */
822+ nativeSupported : ReadonlySet < string > ;
823+ }
824+
825+ /**
826+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
827+ *
828+ * Returns relative paths that:
829+ * - appear in `existingNodes` or `existingHashes` (in DB),
830+ * - are absent from `expected` (not on disk),
831+ * - have an extension installed for WASM, AND
832+ * - have an extension NOT covered by `nativeSupported` — Rust's
833+ * `purge_changed_files` handles deletion for natively-supported extensions
834+ * via its own `detect_removed_files`, so the caller must not double-purge.
835+ *
836+ * Extensions are lowercased before lookup to match the registry and Rust's
837+ * `LanguageKind::from_extension` (which normalises case for the languages
838+ * where both cases are conventional, e.g. R's `.r` / `.R`).
839+ *
840+ * DB paths are forced to forward slashes before comparison with `expected`
841+ * (which is always normalised). The on-disk invariant is that DB rows are
842+ * written with forward slashes, but a stale row written by older code on
843+ * Windows could carry back-slashes — normalising here makes the comparison
844+ * platform-safe and prevents false-positive purges of live rows. We replace
845+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
846+ * `path.sep`) so the defence works when running on POSIX against a DB that
847+ * was migrated from Windows.
848+ *
849+ * Exported for unit testing.
850+ */
851+ export function computeWasmOnlyStaleFiles ( input : WasmOnlyStaleFilesInput ) : string [ ] {
852+ const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input ;
853+ const stale : string [ ] = [ ] ;
854+ const seen = new Set < string > ( ) ;
855+ const consider = ( rawRel : string ) : void => {
856+ const rel = rawRel . replace ( / \\ / g, '/' ) ;
857+ if ( expected . has ( rel ) || seen . has ( rel ) ) return ;
858+ const ext = path . extname ( rel ) . toLowerCase ( ) ;
859+ if ( nativeSupported . has ( ext ) ) return ;
860+ if ( ! installedExts . has ( ext ) ) return ;
861+ seen . add ( rel ) ;
862+ // Push the ORIGINAL raw path (not the normalised form) so the eventual
863+ // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
864+ // matches the actual stored row. The dedup `seen` set keeps the
865+ // normalised form so a file written once with `\` and once with `/`
866+ // is still treated as one entry — but the value the SQL sees has to
867+ // be byte-identical to what's on disk in the DB.
868+ stale . push ( rawRel ) ;
869+ } ;
870+ for ( const rel of existingNodes ) consider ( rel ) ;
871+ for ( const rel of existingHashes ) consider ( rel ) ;
872+ return stale ;
873+ }
874+
875+ /**
876+ * Group relative paths by their lowercased extension. Shape matches the bucket
877+ * type that `formatDropExtensionSummary` consumes, so callers can render a
878+ * log-friendly per-extension summary without going through `classifyNativeDrops`
879+ * when the reason is already known (e.g. the stale-purge path where every path
880+ * is guaranteed `unsupported-by-native`).
881+ */
882+ function groupByExtension ( relPaths : Iterable < string > ) : Map < string , string [ ] > {
883+ const buckets = new Map < string , string [ ] > ( ) ;
884+ for ( const rel of relPaths ) {
885+ const ext = path . extname ( rel ) . toLowerCase ( ) ;
886+ let list = buckets . get ( ext ) ;
887+ if ( ! list ) {
888+ list = [ ] ;
889+ buckets . set ( ext , list ) ;
890+ }
891+ list . push ( rel ) ;
892+ }
893+ return buckets ;
790894}
791895
792896/**
@@ -803,6 +907,13 @@ interface DroppedLanguageGap {
803907 * regression — excluding them keeps the warn count in
804908 * `backfillNativeDroppedFiles` meaningful.
805909 *
910+ * Also detects WASM-only files deleted from disk (#1073). Rust's
911+ * `detect_removed_files` filter (#1070) skips files outside its supported
912+ * extensions, so deletions of WASM-only languages don't reach the native
913+ * purge path; the rest of the backfill only inserts rows, so without this
914+ * step stale `nodes`/`file_hashes` rows would linger across incremental
915+ * rebuilds until the next full rebuild.
916+ *
806917 * Cheap (no DB handoff, no parsing): used both to gate the backfill call
807918 * and as its working set. NativeDbProxy supports `.prepare().all()`, so
808919 * this works whether `ctx.db` is a proxy or a real better-sqlite3
@@ -844,13 +955,25 @@ function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
844955 missingRel . push ( rel ) ;
845956 missingAbs . push ( path . join ( ctx . rootDir , rel ) ) ;
846957 }
847- return { missingRel, missingAbs } ;
958+
959+ const staleRel = computeWasmOnlyStaleFiles ( {
960+ existingNodes,
961+ existingHashes,
962+ expected,
963+ installedExts,
964+ nativeSupported : NATIVE_SUPPORTED_EXTENSIONS ,
965+ } ) ;
966+
967+ return { missingRel, missingAbs, staleRel } ;
848968}
849969
850970/**
851971 * Backfill files that the native orchestrator silently dropped during parse.
852972 * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
853973 *
974+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
975+ * Rust's `detect_removed_files` filter (#1070) skips.
976+ *
854977 * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
855978 * can use the same scan for both gating and the actual backfill — avoiding
856979 * a redundant fs walk when the orchestrator's signals already triggered.
@@ -859,8 +982,8 @@ async function backfillNativeDroppedFiles(
859982 ctx : PipelineContext ,
860983 gap : DroppedLanguageGap ,
861984) : Promise < void > {
862- const { missingRel, missingAbs } = gap ;
863- if ( missingAbs . length === 0 ) return ;
985+ const { missingRel, missingAbs, staleRel } = gap ;
986+ if ( missingAbs . length === 0 && staleRel . length === 0 ) return ;
864987
865988 // Now that we know there's work to do, hand off to better-sqlite3 (needed
866989 // for the INSERT path below).
@@ -870,6 +993,28 @@ async function backfillNativeDroppedFiles(
870993 ctx . nativeFirstProxy = false ;
871994 }
872995
996+ const dbConn = ctx . db as unknown as BetterSqlite3Database ;
997+
998+ // Purge WASM-only files that were deleted from disk (#1073). Rust's
999+ // detect_removed_files skips them and the insert path below never visits
1000+ // them, so without this their rows would persist across rebuilds until the
1001+ // next full rebuild reset the DB.
1002+ if ( staleRel . length > 0 ) {
1003+ // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
1004+ // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
1005+ // always bucket 100% into `unsupported-by-native`. Build the extension
1006+ // summary directly to avoid a redundant classification pass.
1007+ const staleByExt = groupByExtension ( staleRel ) ;
1008+ info (
1009+ `Detected ${ staleRel . length } deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${ formatDropExtensionSummary ( staleByExt ) } ` ,
1010+ ) ;
1011+ purgeFilesData ( dbConn , staleRel ) ;
1012+ }
1013+
1014+ if ( missingAbs . length === 0 ) return ;
1015+
1016+ if ( missingAbs . length === 0 ) return ;
1017+
8731018 // Classify drops so users see per-extension reasons instead of just a count
8741019 // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
8751020 // extractor); `native-extractor-failure` indicates a real native bug since
@@ -918,7 +1063,7 @@ async function backfillNativeDroppedFiles(
9181063 exportKeys . push ( [ exp . name , exp . kind , relPath , exp . line ] ) ;
9191064 }
9201065 }
921- const db = ctx . db as unknown as BetterSqlite3Database ;
1066+ const db = dbConn ;
9221067 batchInsertNodes ( db , rows ) ;
9231068
9241069 // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
0 commit comments