optave · carlos-alm · May 5, 2026 · May 4, 2026 · May 5, 2026 · May 5, 2026
diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts
@@ -9,6 +9,7 @@ import path from 'node:path';
 import { performance } from 'node:perf_hooks';
 import {
   acquireAdvisoryLock,
+  closeDb,
   closeDbPair,
   getBuildMeta,
   initSchema,
@@ -39,6 +40,7 @@ import {
   getInstalledWasmExtensions,
   parseFilesWasmForBackfill,
 } from '../../parser.js';
+import { writeJournalHeader } from '../journal.js';
 import { setWorkspaces } from '../resolve.js';
 import { PipelineContext } from './context.js';
 import { batchInsertNodes, collectFiles as collectFilesUtil, loadPathAliases } from './helpers.js';
@@ -47,7 +49,7 @@ import { buildEdges } from './stages/build-edges.js';
 import { buildStructure } from './stages/build-structure.js';
 // Pipeline stages
 import { collectFiles } from './stages/collect-files.js';
-import { detectChanges } from './stages/detect-changes.js';
+import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
 import { finalize } from './stages/finalize.js';
 import { insertNodes } from './stages/insert-nodes.js';
 import { parseFiles } from './stages/parse-files.js';
@@ -1000,6 +1002,42 @@ export async function buildGraph(
   try {
     setupPipeline(ctx);
 
+    // ── JS-side fast-skip for native incremental (#1054) ──────────────
+    // The Rust orchestrator's internal early-exit fires reliably locally
+    // but not in CI, where every no-op rebuild was paying the full ~2s
+    // pipeline cost. A read-only mtime+size check here matches WASM's
+    // ~20ms early-exit and skips the orchestrator entirely when no
+    // source files have changed. Tier-2 hashing is left to the native
+    // side: any mismatch falls through and lets Rust's detect_changes
+    // remain the source of truth.
+    if (
+      ctx.nativeAvailable &&
+      ctx.engineName === 'native' &&
+      ctx.incremental &&
+      !ctx.forceFullRebuild &&
+      !(ctx.opts as Record<string, unknown>).scope
+    ) {
+      try {
+        await collectFiles(ctx);
+        if (
+          detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record<string, unknown>)
+        ) {
+          info('No changes detected. Graph is up to date.');
+          writeJournalHeader(ctx.rootDir, Date.now());
+          closeDb(ctx.db);
+          return;
+        }
+      } catch (err) {
+        // Pre-flight is best-effort — any failure falls through to the
+        // orchestrator, which performs its own complete detection.
+        // Reset ctx.allFiles so runPipelineStages re-collects under its own
+        // engine state if we ended up partially populated before throwing.
+        ctx.allFiles = undefined as unknown as string[];
+        ctx.discoveredDirs = undefined as unknown as Set<string>;
+        debug(`native fast-skip pre-flight failed: ${toErrorMessage(err)}`);
+      }
+    }
+
     // ── Rust orchestrator fast path (#695) ────────────────────────────
     // When available, run the entire build pipeline in Rust with zero
     // napi crossings (eliminates WAL dual-connection dance). Falls back

diff --git a/src/domain/graph/builder/stages/collect-files.ts b/src/domain/graph/builder/stages/collect-files.ts
@@ -100,6 +100,15 @@ function tryFastCollect(
 export async function collectFiles(ctx: PipelineContext): Promise<void> {
   const { rootDir, config, opts } = ctx;
 
+  // Skip when the JS-side fast-skip pre-flight (#1054) already populated the
+  // file list and changes were detected, causing fallthrough to the native
+  // orchestrator and then to runPipelineStages. Avoids redoing the filesystem
+  // walk on the non-skip path (~8ms on 473 files). On pre-flight failure the
+  // caller resets ctx.allFiles so this guard correctly falls through.
+  if (!opts.scope && ctx.allFiles?.length && ctx.discoveredDirs?.size) {
+    return;
+  }
+
   if (opts.scope) {
     // Scoped rebuild: rebuild only specified files.
     //

diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts
@@ -512,6 +512,94 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
   purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
 }
 
+/**
+ * Read-only pre-flight check for the native orchestrator.
+ *
+ * Returns true iff every collected source file has matching mtime+size in
+ * `file_hashes` and no DB-tracked file has been removed. When true, the
+ * caller can short-circuit before invoking the native orchestrator —
+ * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
+ * per-call native rebuild overhead seen in CI (#1054).
+ *
+ * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
+ * hashing is left to the native side: when this returns false the caller
+ * falls through to the orchestrator, which performs its own complete
+ * detection and is the source of truth.
+ *
+ * Conservatively returns false when CFG or dataflow analysis is enabled
+ * but the corresponding tables are empty — otherwise the fast-skip would
+ * silently suppress the pending-analysis pass that the JS path runs via
+ * `runPendingAnalysis`, and CFG/dataflow data would never populate on
+ * repos where source files don't change between builds.
+ *
+ * Pure read of `db` and the filesystem — never mutates either.
+ */
+export function detectNoChanges(
+  db: BetterSqlite3Database,
+  allFiles: string[],
+  rootDir: string,
+  opts?: Record<string, unknown>,
+): boolean {
+  let hasTable = false;
+  try {
+    db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
+    hasTable = true;
+  } catch {
+    /* table missing — first build */
+  }
+  if (!hasTable) return false;
+
+  const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
+  if (rows.length === 0) return false;
+  const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
+
+  const currentFiles = new Set<string>();
+  for (const file of allFiles) {
+    currentFiles.add(normalizePath(path.relative(rootDir, file)));
+  }
+  for (const existingFile of existing.keys()) {
+    if (!currentFiles.has(existingFile)) return false;
+  }
+
+  for (const file of allFiles) {
+    const relPath = normalizePath(path.relative(rootDir, file));
+    const record = existing.get(relPath);
+    if (!record) return false;
+    const stat = fileStat(file) as FileStat | undefined;
+    if (!stat) return false;
+    const storedMtime = record.mtime || 0;
+    const storedSize = record.size || 0;
+    if (storedSize <= 0) return false;
+    if (Math.floor(stat.mtimeMs) !== storedMtime || stat.size !== storedSize) return false;
+  }
+
+  // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
+  // table is empty (analysis newly enabled, or tables wiped between builds),
+  // fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
+  // Mirrors the check at the top of runPendingAnalysis (see line ~244).
+  if (opts) {
+    if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) return false;
+    if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) return false;
+  }
+
+  return true;
+}
+
+/**
+ * Returns true if `table` exists and has zero rows, matching the empty-table
+ * semantics of `runPendingAnalysis`. A missing table is treated as empty
+ * (the conservative outcome), so the caller falls through to the orchestrator
+ * which will create the schema and populate it.
+ */
+function hasEmptyAnalysisTable(db: BetterSqlite3Database, table: string): boolean {
+  try {
+    const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as { c: number } | undefined;
+    return (row?.c ?? 0) === 0;
+  } catch {
+    return true;
+  }
+}
+
 export async function detectChanges(ctx: PipelineContext): Promise<void> {
   const start = performance.now();
   try {

diff --git a/tests/builder/detect-changes.test.ts b/tests/builder/detect-changes.test.ts
@@ -7,7 +7,10 @@ import path from 'node:path';
 import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 import { closeDb, initSchema, openDb } from '../../src/db/index.js';
 import { PipelineContext } from '../../src/domain/graph/builder/context.js';
-import { detectChanges } from '../../src/domain/graph/builder/stages/detect-changes.js';
+import {
+  detectChanges,
+  detectNoChanges,
+} from '../../src/domain/graph/builder/stages/detect-changes.js';
 import { writeJournalHeader } from '../../src/domain/graph/journal.js';
 
 let tmpDir: string;
@@ -142,3 +145,135 @@ describe('detectChanges stage', () => {
     fs.rmSync(dir, { recursive: true, force: true });
   });
 });
+
+describe('detectNoChanges fast-skip', () => {
+  function seedFile(dir: string, name: string, content: string): string {
+    const filePath = path.join(dir, name);
+    fs.writeFileSync(filePath, content);
+    return filePath;
+  }
+
+  function seedHashRow(
+    db: ReturnType<typeof openDb>,
+    relPath: string,
+    filePath: string,
+  ): { mtime: number; size: number } {
+    const stat = fs.statSync(filePath);
+    const mtime = Math.floor(stat.mtimeMs);
+    db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run(
+      relPath,
+      'deadbeef',
+      mtime,
+      stat.size,
+    );
+    return { mtime, size: stat.size };
+  }
+
+  it('returns false when file_hashes is empty (first build)', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-empty-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+
+    expect(detectNoChanges(db, [file], dir)).toBe(false);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns true when mtime+size match seeded file_hashes', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-match-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+    seedHashRow(db, 'a.js', file);
+
+    expect(detectNoChanges(db, [file], dir)).toBe(true);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns false when a tracked file has been deleted', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-deleted-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+    seedHashRow(db, 'a.js', file);
+    seedHashRow(db, 'gone.js', file); // tracked but no longer on disk
+
+    expect(detectNoChanges(db, [file], dir)).toBe(false);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns false when mtime differs from seeded value', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-mtime-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+    const stat = fs.statSync(file);
+    db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run(
+      'a.js',
+      'deadbeef',
+      Math.floor(stat.mtimeMs) + 1000, // skewed mtime
+      stat.size,
+    );
+
+    expect(detectNoChanges(db, [file], dir)).toBe(false);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns false when CFG analysis is enabled but cfg_blocks is empty (#1064)', () => {
+    // Pending-analysis guard: even though mtime+size match, if cfg_blocks
+    // is empty (analysis newly enabled), the caller must fall through so
+    // runPendingAnalysis can populate the table.
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingCfg-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+    seedHashRow(db, 'a.js', file);
+    // cfg_blocks table is created empty by initSchema — that's the trigger.
+
+    // Without opts: legacy behaviour — fast-skip returns true.
+    expect(detectNoChanges(db, [file], dir)).toBe(true);
+    // With cfg enabled (cfg !== false) and cfg_blocks empty: must return false.
+    expect(detectNoChanges(db, [file], dir, { cfg: true, dataflow: false })).toBe(false);
+    // When cfg explicitly disabled (and dataflow disabled too so its guard
+    // doesn't fire), the empty cfg table is irrelevant.
+    expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('returns false when dataflow is enabled but dataflow table is empty (#1064)', () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-noChange-pendingDf-'));
+    const dbDir = path.join(dir, '.codegraph');
+    fs.mkdirSync(dbDir, { recursive: true });
+    const db = openDb(path.join(dbDir, 'graph.db'));
+    initSchema(db);
+    const file = seedFile(dir, 'a.js', 'export const a = 1;');
+    seedHashRow(db, 'a.js', file);
+
+    // Disable cfg so only the dataflow guard is exercised.
+    expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: true })).toBe(false);
+    expect(detectNoChanges(db, [file], dir, { cfg: false, dataflow: false })).toBe(true);
+
+    closeDb(db);
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
+});