From e5510e9d8fb0d0dc85d883ab6596323632a43461 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 4 May 2026 17:39:53 -0600 Subject: [PATCH 1/3] fix(native): js-side fast-skip for incremental no-op rebuilds (#1054) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Rust orchestrator's internal early-exit fires reliably locally but not in CI, where every native no-op rebuild was paying the full ~2s pipeline cost (parse, ast, cfg, dataflow, edges all re-running). WASM exits in ~20ms via detectChanges before any work happens. Mirror that behavior at the JS layer: a read-only Tier-0/Tier-1 (mtime+size) check before invoking the native orchestrator. When every collected file matches file_hashes, skip the orchestrator entirely. Tier-2 hashing stays on the native side — any mismatch falls through and lets Rust's detect_changes remain the source of truth. Benchmark on this repo (744 files): native noopRebuildMs: 2125ms → 22ms (matches WASM's 23ms) Closes #1054 --- src/domain/graph/builder/pipeline.ts | 34 ++++++++++- .../graph/builder/stages/detect-changes.ts | 57 +++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index db8eb797..954a5900 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -9,6 +9,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { acquireAdvisoryLock, + closeDb, closeDbPair, getBuildMeta, initSchema, @@ -39,6 +40,7 @@ import { getInstalledWasmExtensions, parseFilesWasmForBackfill, } from '../../parser.js'; +import { writeJournalHeader } from '../journal.js'; import { setWorkspaces } from '../resolve.js'; import { PipelineContext } from './context.js'; import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js'; @@ -47,7 +49,7 @@ import { buildEdges } from './stages/build-edges.js'; import { buildStructure } from './stages/build-structure.js'; // Pipeline stages import { collectFiles } from './stages/collect-files.js'; -import { detectChanges } from './stages/detect-changes.js'; +import { detectChanges, detectNoChanges } from './stages/detect-changes.js'; import { finalize } from './stages/finalize.js'; import { insertNodes } from './stages/insert-nodes.js'; import { parseFiles } from './stages/parse-files.js'; @@ -1000,6 +1002,36 @@ export async function buildGraph( try { setupPipeline(ctx); + // ── JS-side fast-skip for native incremental (#1054) ────────────── + // The Rust orchestrator's internal early-exit fires reliably locally + // but not in CI, where every no-op rebuild was paying the full ~2s + // pipeline cost. A read-only mtime+size check here matches WASM's + // ~20ms early-exit and skips the orchestrator entirely when no + // source files have changed. Tier-2 hashing is left to the native + // side: any mismatch falls through and lets Rust's detect_changes + // remain the source of truth. + if ( + ctx.nativeAvailable && + ctx.engineName === 'native' && + ctx.incremental && + !ctx.forceFullRebuild && + !(ctx.opts as Record).scope + ) { + try { + await collectFiles(ctx); + if (detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir)) { + info('No changes detected. Graph is up to date.'); + writeJournalHeader(ctx.rootDir, Date.now()); + closeDb(ctx.db); + return; + } + } catch (err) { + // Pre-flight is best-effort — any failure falls through to the + // orchestrator, which performs its own complete detection. + debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`); + } + } + // ── Rust orchestrator fast path (#695) ──────────────────────────── // When available, run the entire build pipeline in Rust with zero // napi crossings (eliminates WAL dual-connection dance). Falls back diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 4db72865..673728c2 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -512,6 +512,63 @@ function handleIncrementalBuild(ctx: PipelineContext): void { purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } +/** + * Read-only pre-flight check for the native orchestrator. + * + * Returns true iff every collected source file has matching mtime+size in + * `file_hashes` and no DB-tracked file has been removed. When true, the + * caller can short-circuit before invoking the native orchestrator — + * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat + * per-call native rebuild overhead seen in CI (#1054). + * + * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content + * hashing is left to the native side: when this returns false the caller + * falls through to the orchestrator, which performs its own complete + * detection and is the source of truth. + * + * Pure read of `db` and the filesystem — never mutates either. + */ +export function detectNoChanges( + db: BetterSqlite3Database, + allFiles: string[], + rootDir: string, +): boolean { + let hasTable = false; + try { + db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); + hasTable = true; + } catch { + /* table missing — first build */ + } + if (!hasTable) return false; + + const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[]; + if (rows.length === 0) return false; + const existing = new Map(rows.map((r) => [r.file, r])); + + const currentFiles = new Set(); + for (const file of allFiles) { + currentFiles.add(normalizePath(path.relative(rootDir, file))); + } + for (const existingFile of existing.keys()) { + if (!currentFiles.has(existingFile)) return false; + } + + for (const file of allFiles) { + const relPath = normalizePath(path.relative(rootDir, file)); + const record = existing.get(relPath); + if (!record) return false; + const stat = fileStat(file) as FileStat | undefined; + if (!stat) return false; + const storedMtime = record.mtime || 0; + const storedSize = record.size || 0; + if (storedSize <= 0) return false; + if (Math.floor(stat.mtimeMs) !== storedMtime || stat.size !== storedSize) return false; + } + + return true; +} + export async function detectChanges(ctx: PipelineContext): Promise { const start = performance.now(); try { From 86994711d77dd34d36f8357669456bda0f45a242 Mon Sep 17 00:00:00 2001 From: Carlos Almeida Date: Mon, 4 May 2026 22:29:25 -0600 Subject: [PATCH 2/3] fix(detect-changes): guard fast-skip on empty CFG/dataflow tables (#1064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Tier-0/Tier-1 fast-skip introduced by #1064 short-circuited builds based purely on mtime+size, missing the runPendingAnalysis guard that the existing JS early-exit path (detectChanges, line ~610) always runs. If CFG or dataflow analysis was enabled (or tables wiped) between builds and no source files changed, mtime/size would still match file_hashes, detectNoChanges would return true, and the pending analysis pass would silently never run — leaving cfg_blocks and dataflow empty indefinitely on no-op-rebuild repos. Add a conservative pending-analysis guard: when opts.cfg !== false and cfg_blocks is empty, return false; same for opts.dataflow / dataflow. The caller then falls through to the orchestrator (or JS pipeline), which populates the tables via the existing runPendingAnalysis path. Adds unit tests for detectNoChanges covering empty file_hashes, mtime+size match, deleted tracked file, mtime drift, and the new pending-analysis guards for both cfg and dataflow. Impact: 2 functions changed, 7 affected --- .../graph/builder/stages/detect-changes.ts | 31 ++++ tests/builder/detect-changes.test.ts | 137 +++++++++++++++++- 2 files changed, 167 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 673728c2..44018d78 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -526,12 +526,19 @@ function handleIncrementalBuild(ctx: PipelineContext): void { * falls through to the orchestrator, which performs its own complete * detection and is the source of truth. * + * Conservatively returns false when CFG or dataflow analysis is enabled + * but the corresponding tables are empty — otherwise the fast-skip would + * silently suppress the pending-analysis pass that the JS path runs via + * `runPendingAnalysis`, and CFG/dataflow data would never populate on + * repos where source files don't change between builds. + * * Pure read of `db` and the filesystem — never mutates either. */ export function detectNoChanges( db: BetterSqlite3Database, allFiles: string[], rootDir: string, + opts?: Record, ): boolean { let hasTable = false; try { @@ -566,9 +573,33 @@ export function detectNoChanges( if (Math.floor(stat.mtimeMs) !== storedMtime || stat.size !== storedSize) return false; } + // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding + // table is empty (analysis newly enabled, or tables wiped between builds), + // fall through so the orchestrator / JS pipeline can run runPendingAnalysis. + // Mirrors the check at the top of runPendingAnalysis (see line ~244). + if (opts) { + if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) return false; + if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) return false; + } + return true; } +/** + * Returns true if `table` exists and has zero rows, matching the empty-table + * semantics of `runPendingAnalysis`. A missing table is treated as empty + * (the conservative outcome), so the caller falls through to the orchestrator + * which will create the schema and populate it. + */ +function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean { + try { + const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined; + return (row?.c ?? 0) === 0; + } catch { + return true; + } +} + export async function detectChanges(ctx: PipelineContext): Promise { const start = performance.now(); try { diff --git a/tests/builder/detect-changes.test.ts b/tests/builder/detect-changes.test.ts index 0d13fd72..7d798ef1 100644 --- a/tests/builder/detect-changes.test.ts +++ b/tests/builder/detect-changes.test.ts @@ -7,7 +7,10 @@ import path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { closeDb, initSchema, openDb } from '../../src/db/index.js'; import { PipelineContext } from '../../src/domain/graph/builder/context.js'; -import { detectChanges } from '../../src/domain/graph/builder/stages/detect-changes.js'; +import { + detectChanges, + detectNoChanges, +} from '../../src/domain/graph/builder/stages/detect-changes.js'; import { writeJournalHeader } from '../../src/domain/graph/journal.js'; let tmpDir: string; @@ -142,3 +145,135 @@ describe('detectChanges stage', () => { fs.rmSync(dir, { recursive: true, force: true }); }); }); + +describe('detectNoChanges fast-skip', () => { + function seedFile(dir: string, name: string, content: string): string { + const filePath = path.join(dir, name); + fs.writeFileSync(filePath, content); + return filePath; + } + + function seedHashRow( + db: ReturnType, + relPath: string, + filePath: string, + ): { mtime: number; size: number } { + const stat = fs.statSync(filePath); + const mtime = Math.floor(stat.mtimeMs); + db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( + relPath, + 'deadbeef', + mtime, + stat.size, + ); + return { mtime, size: stat.size }; + } + + it('returns false when file_hashes is empty (first build)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-empty-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns true when mtime+size match seeded file_hashes', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-match-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + + expect(detectNoChanges(db, [file], dir)).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when a tracked file has been deleted', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-deleted-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + seedHashRow(db, 'gone.js', file); // tracked but no longer on disk + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when mtime differs from seeded value', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-mtime-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + const stat = fs.statSync(file); + db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( + 'a.js', + 'deadbeef', + Math.floor(stat.mtimeMs) + 1000, // skewed mtime + stat.size, + ); + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when CFG analysis is enabled but cfg_blocks is empty (#1064)', () => { + // Pending-analysis guard: even though mtime+size match, if cfg_blocks + // is empty (analysis newly enabled), the caller must fall through so + // runPendingAnalysis can populate the table. + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingCfg-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + // cfg_blocks table is created empty by initSchema — that's the trigger. + + // Without opts: legacy behaviour — fast-skip returns true. + expect(detectNoChanges(db, [file], dir)).toBe(true); + // With cfg enabled (cfg !== false) and cfg_blocks empty: must return false. + expect(detectNoChanges(db, [file], dir, { cfg: true, dataflow: false })).toBe(false); + // When cfg explicitly disabled (and dataflow disabled too so its guard + // doesn't fire), the empty cfg table is irrelevant. + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when dataflow is enabled but dataflow table is empty (#1064)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingDf-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + + // Disable cfg so only the dataflow guard is exercised. + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: true })).toBe(false); + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); +}); From 540a4200420ec66e3d4c6c971aaa852b124f5567 Mon Sep 17 00:00:00 2001 From: Carlos Almeida Date: Mon, 4 May 2026 22:29:44 -0600 Subject: [PATCH 3/3] fix(builder): avoid redundant collectFiles on fast-skip fallthrough (#1064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the JS-side fast-skip pre-flight ran but detectNoChanges returned false, control fell through to tryNativeOrchestrator and, on native fallback, to runPipelineStages — which called collectFiles again at line 901, doubling the filesystem walk on the non-skip path and counteracting the PR's goal of eliminating overhead. Guard the collectFiles stage so it returns early when ctx.allFiles and ctx.discoveredDirs are already populated (and not in scoped mode). On pre-flight failure, the buildGraph catch block now resets these fields so the guard correctly falls through and re-collects under runPipelineStages's own engine state. Impact: 2 functions changed, 8 affected --- src/domain/graph/builder/pipeline.ts | 8 +++++++- src/domain/graph/builder/stages/collect-files.ts | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 954a5900..8c00d94a 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -1019,7 +1019,9 @@ export async function buildGraph( ) { try { await collectFiles(ctx); - if (detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir)) { + if ( + detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record) + ) { info('No changes detected. Graph is up to date.'); writeJournalHeader(ctx.rootDir, Date.now()); closeDb(ctx.db); @@ -1028,6 +1030,10 @@ export async function buildGraph( } catch (err) { // Pre-flight is best-effort — any failure falls through to the // orchestrator, which performs its own complete detection. + // Reset ctx.allFiles so runPipelineStages re-collects under its own + // engine state if we ended up partially populated before throwing. + ctx.allFiles = undefined as unknown as string[]; + ctx.discoveredDirs = undefined as unknown as Set; debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`); } } diff --git a/src/domain/graph/builder/stages/collect-files.ts b/src/domain/graph/builder/stages/collect-files.ts index 840af3f6..7be50c05 100644 --- a/src/domain/graph/builder/stages/collect-files.ts +++ b/src/domain/graph/builder/stages/collect-files.ts @@ -100,6 +100,15 @@ function tryFastCollect( export async function collectFiles(ctx: PipelineContext): Promise { const { rootDir, config, opts } = ctx; + // Skip when the JS-side fast-skip pre-flight (#1054) already populated the + // file list and changes were detected, causing fallthrough to the native + // orchestrator and then to runPipelineStages. Avoids redoing the filesystem + // walk on the non-skip path (~8ms on 473 files). On pre-flight failure the + // caller resets ctx.allFiles so this guard correctly falls through. + if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) { + return; + } + if (opts.scope) { // Scoped rebuild: rebuild only specified files. //