diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index db8eb797..8c00d94a 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -9,6 +9,7 @@ import path from 'node:path'; import { performance } from 'node:perf_hooks'; import { acquireAdvisoryLock, + closeDb, closeDbPair, getBuildMeta, initSchema, @@ -39,6 +40,7 @@ import { getInstalledWasmExtensions, parseFilesWasmForBackfill, } from '../../parser.js'; +import { writeJournalHeader } from '../journal.js'; import { setWorkspaces } from '../resolve.js'; import { PipelineContext } from './context.js'; import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js'; @@ -47,7 +49,7 @@ import { buildEdges } from './stages/build-edges.js'; import { buildStructure } from './stages/build-structure.js'; // Pipeline stages import { collectFiles } from './stages/collect-files.js'; -import { detectChanges } from './stages/detect-changes.js'; +import { detectChanges, detectNoChanges } from './stages/detect-changes.js'; import { finalize } from './stages/finalize.js'; import { insertNodes } from './stages/insert-nodes.js'; import { parseFiles } from './stages/parse-files.js'; @@ -1000,6 +1002,42 @@ export async function buildGraph( try { setupPipeline(ctx); + // ── JS-side fast-skip for native incremental (#1054) ────────────── + // The Rust orchestrator's internal early-exit fires reliably locally + // but not in CI, where every no-op rebuild was paying the full ~2s + // pipeline cost. A read-only mtime+size check here matches WASM's + // ~20ms early-exit and skips the orchestrator entirely when no + // source files have changed. Tier-2 hashing is left to the native + // side: any mismatch falls through and lets Rust's detect_changes + // remain the source of truth. + if ( + ctx.nativeAvailable && + ctx.engineName === 'native' && + ctx.incremental && + !ctx.forceFullRebuild && + !(ctx.opts as Record).scope + ) { + try { + await collectFiles(ctx); + if ( + detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record) + ) { + info('No changes detected. Graph is up to date.'); + writeJournalHeader(ctx.rootDir, Date.now()); + closeDb(ctx.db); + return; + } + } catch (err) { + // Pre-flight is best-effort — any failure falls through to the + // orchestrator, which performs its own complete detection. + // Reset ctx.allFiles so runPipelineStages re-collects under its own + // engine state if we ended up partially populated before throwing. + ctx.allFiles = undefined as unknown as string[]; + ctx.discoveredDirs = undefined as unknown as Set; + debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`); + } + } + // ── Rust orchestrator fast path (#695) ──────────────────────────── // When available, run the entire build pipeline in Rust with zero // napi crossings (eliminates WAL dual-connection dance). Falls back diff --git a/src/domain/graph/builder/stages/collect-files.ts b/src/domain/graph/builder/stages/collect-files.ts index 840af3f6..7be50c05 100644 --- a/src/domain/graph/builder/stages/collect-files.ts +++ b/src/domain/graph/builder/stages/collect-files.ts @@ -100,6 +100,15 @@ function tryFastCollect( export async function collectFiles(ctx: PipelineContext): Promise { const { rootDir, config, opts } = ctx; + // Skip when the JS-side fast-skip pre-flight (#1054) already populated the + // file list and changes were detected, causing fallthrough to the native + // orchestrator and then to runPipelineStages. Avoids redoing the filesystem + // walk on the non-skip path (~8ms on 473 files). On pre-flight failure the + // caller resets ctx.allFiles so this guard correctly falls through. + if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) { + return; + } + if (opts.scope) { // Scoped rebuild: rebuild only specified files. // diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 4db72865..44018d78 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -512,6 +512,94 @@ function handleIncrementalBuild(ctx: PipelineContext): void { purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } +/** + * Read-only pre-flight check for the native orchestrator. + * + * Returns true iff every collected source file has matching mtime+size in + * `file_hashes` and no DB-tracked file has been removed. When true, the + * caller can short-circuit before invoking the native orchestrator — + * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat + * per-call native rebuild overhead seen in CI (#1054). + * + * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content + * hashing is left to the native side: when this returns false the caller + * falls through to the orchestrator, which performs its own complete + * detection and is the source of truth. + * + * Conservatively returns false when CFG or dataflow analysis is enabled + * but the corresponding tables are empty — otherwise the fast-skip would + * silently suppress the pending-analysis pass that the JS path runs via + * `runPendingAnalysis`, and CFG/dataflow data would never populate on + * repos where source files don't change between builds. + * + * Pure read of `db` and the filesystem — never mutates either. + */ +export function detectNoChanges( + db: BetterSqlite3Database, + allFiles: string[], + rootDir: string, + opts?: Record, +): boolean { + let hasTable = false; + try { + db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); + hasTable = true; + } catch { + /* table missing — first build */ + } + if (!hasTable) return false; + + const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[]; + if (rows.length === 0) return false; + const existing = new Map(rows.map((r) => [r.file, r])); + + const currentFiles = new Set(); + for (const file of allFiles) { + currentFiles.add(normalizePath(path.relative(rootDir, file))); + } + for (const existingFile of existing.keys()) { + if (!currentFiles.has(existingFile)) return false; + } + + for (const file of allFiles) { + const relPath = normalizePath(path.relative(rootDir, file)); + const record = existing.get(relPath); + if (!record) return false; + const stat = fileStat(file) as FileStat | undefined; + if (!stat) return false; + const storedMtime = record.mtime || 0; + const storedSize = record.size || 0; + if (storedSize <= 0) return false; + if (Math.floor(stat.mtimeMs) !== storedMtime || stat.size !== storedSize) return false; + } + + // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding + // table is empty (analysis newly enabled, or tables wiped between builds), + // fall through so the orchestrator / JS pipeline can run runPendingAnalysis. + // Mirrors the check at the top of runPendingAnalysis (see line ~244). + if (opts) { + if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) return false; + if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) return false; + } + + return true; +} + +/** + * Returns true if `table` exists and has zero rows, matching the empty-table + * semantics of `runPendingAnalysis`. A missing table is treated as empty + * (the conservative outcome), so the caller falls through to the orchestrator + * which will create the schema and populate it. + */ +function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean { + try { + const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined; + return (row?.c ?? 0) === 0; + } catch { + return true; + } +} + export async function detectChanges(ctx: PipelineContext): Promise { const start = performance.now(); try { diff --git a/tests/builder/detect-changes.test.ts b/tests/builder/detect-changes.test.ts index 0d13fd72..7d798ef1 100644 --- a/tests/builder/detect-changes.test.ts +++ b/tests/builder/detect-changes.test.ts @@ -7,7 +7,10 @@ import path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { closeDb, initSchema, openDb } from '../../src/db/index.js'; import { PipelineContext } from '../../src/domain/graph/builder/context.js'; -import { detectChanges } from '../../src/domain/graph/builder/stages/detect-changes.js'; +import { + detectChanges, + detectNoChanges, +} from '../../src/domain/graph/builder/stages/detect-changes.js'; import { writeJournalHeader } from '../../src/domain/graph/journal.js'; let tmpDir: string; @@ -142,3 +145,135 @@ describe('detectChanges stage', () => { fs.rmSync(dir, { recursive: true, force: true }); }); }); + +describe('detectNoChanges fast-skip', () => { + function seedFile(dir: string, name: string, content: string): string { + const filePath = path.join(dir, name); + fs.writeFileSync(filePath, content); + return filePath; + } + + function seedHashRow( + db: ReturnType, + relPath: string, + filePath: string, + ): { mtime: number; size: number } { + const stat = fs.statSync(filePath); + const mtime = Math.floor(stat.mtimeMs); + db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( + relPath, + 'deadbeef', + mtime, + stat.size, + ); + return { mtime, size: stat.size }; + } + + it('returns false when file_hashes is empty (first build)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-empty-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns true when mtime+size match seeded file_hashes', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-match-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + + expect(detectNoChanges(db, [file], dir)).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when a tracked file has been deleted', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-deleted-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + seedHashRow(db, 'gone.js', file); // tracked but no longer on disk + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when mtime differs from seeded value', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-mtime-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + const stat = fs.statSync(file); + db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( + 'a.js', + 'deadbeef', + Math.floor(stat.mtimeMs) + 1000, // skewed mtime + stat.size, + ); + + expect(detectNoChanges(db, [file], dir)).toBe(false); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when CFG analysis is enabled but cfg_blocks is empty (#1064)', () => { + // Pending-analysis guard: even though mtime+size match, if cfg_blocks + // is empty (analysis newly enabled), the caller must fall through so + // runPendingAnalysis can populate the table. + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingCfg-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + // cfg_blocks table is created empty by initSchema — that's the trigger. + + // Without opts: legacy behaviour — fast-skip returns true. + expect(detectNoChanges(db, [file], dir)).toBe(true); + // With cfg enabled (cfg !== false) and cfg_blocks empty: must return false. + expect(detectNoChanges(db, [file], dir, { cfg: true, dataflow: false })).toBe(false); + // When cfg explicitly disabled (and dataflow disabled too so its guard + // doesn't fire), the empty cfg table is irrelevant. + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); + + it('returns false when dataflow is enabled but dataflow table is empty (#1064)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingDf-')); + const dbDir = path.join(dir, '.codegraph'); + fs.mkdirSync(dbDir, { recursive: true }); + const db = openDb(path.join(dbDir, 'graph.db')); + initSchema(db); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + seedHashRow(db, 'a.js', file); + + // Disable cfg so only the dataflow guard is exercised. + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: true })).toBe(false); + expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true); + + closeDb(db); + fs.rmSync(dir, { recursive: true, force: true }); + }); +});