From 5ab5bd2ea24d9cf17262d4a576cfe877d4dfe1f5 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 7 May 2026 18:56:06 -0600 Subject: [PATCH 1/2] fix(native): read mtime via BigInt nanoseconds to match Rust truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1075 Rust writes file_hashes.mtime via Duration::as_millis() (truncates), JS read via Math.floor(stat.mtimeMs) (rounds because mtimeMs is f64). At large epoch values the f64 representation rounds up — JS reads N+1 for what Rust stored as N — busting the fast-skip path on every native→JS handoff and producing spurious re-parses on no-op and 1-file rebuilds. Switch fileStat to fs.statSync(path, { bigint: true }) and compute mtime as Number(s.mtimeNs / 1_000_000n). Integer math truncates identically to Duration::as_millis() so writer and reader land on the same value. Updates all consumers (detect-changes, insert-nodes, pipeline backfill) and the FileStat interface to drop the now-redundant Math.floor wrapper. Test seeds use the same BigInt path so they don't regress on f64 round-up. docs check acknowledged: internal change-detection fix; no language, feature, command, architecture, or roadmap surface affected. --- src/domain/graph/builder/helpers.ts | 14 +++++-- src/domain/graph/builder/pipeline.ts | 2 +- .../graph/builder/stages/detect-changes.ts | 12 +++--- .../graph/builder/stages/insert-nodes.ts | 8 ++-- tests/builder/detect-changes.test.ts | 38 ++++++++++++++----- 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 7ab16b63..1be562ae 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -222,12 +222,18 @@ export function fileHash(content: string): string { } /** - * Stat a file, returning { mtimeMs, size } or null on error. + * Stat a file, returning integer-truncated mtime in ms (and size). + * + * Reads via BigInt nanoseconds and truncates with integer math so the value + * matches Rust's `Duration::as_millis() as i64` exactly. `Math.floor(stat.mtimeMs)` + * cannot be substituted: at large epoch values the f64 `mtimeMs` rounds, so a + * Rust-written `file_hashes.mtime` of N can read back as N+1 in JS and bust the + * fast-skip path on every native→JS handoff. */ -export function fileStat(filePath: string): { mtimeMs: number; size: number } | null { +export function fileStat(filePath: string): { mtime: number; size: number } | null { try { - const s = fs.statSync(filePath); - return { mtimeMs: s.mtimeMs, size: s.size }; + const s = fs.statSync(filePath, { bigint: true }); + return { mtime: Number(s.mtimeNs / 1_000_000n), size: Number(s.size) }; } catch { return null; } diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 55096e06..33df7345 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -938,7 +938,7 @@ async function backfillNativeDroppedFiles(ctx: PipelineContext): Promise { } if (code === null) continue; const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const mtime = stat ? stat.mtime : 0; const size = stat ? stat.size : 0; upsertHash.run(relPath, fileHash(code), mtime, size); } diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 6abf224d..cc51155d 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -27,7 +27,7 @@ interface FileHashRow { } interface FileStat { - mtimeMs: number; + mtime: number; size: number; } @@ -182,7 +182,7 @@ function mtimeAndHashTiers( if (!stat) continue; const storedMtime = record.mtime || 0; const storedSize = record.size || 0; - if (storedSize > 0 && Math.floor(stat.mtimeMs) === storedMtime && stat.size === storedSize) { + if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) { skipped.push(relPath); continue; } @@ -596,9 +596,9 @@ export function detectNoChanges( log(`false: stored size <= 0 for ${relPath} (stored=${record.size})`); return false; } - if (Math.floor(stat.mtimeMs) !== storedMtime || stat.size !== storedSize) { + if (stat.mtime !== storedMtime || stat.size !== storedSize) { log( - `false: mtime/size diff for ${relPath}: stat=${Math.floor(stat.mtimeMs)}/${stat.size} stored=${storedMtime}/${storedSize} (mtimeMs=${stat.mtimeMs})`, + `false: mtime/size diff for ${relPath}: stat=${stat.mtime}/${stat.size} stored=${storedMtime}/${storedSize}`, ); return false; } @@ -663,7 +663,7 @@ export async function detectChanges(ctx: PipelineContext): Promise { relPath: c.relPath, content: c.content, hash: c.hash, - stat: c.stat ? { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size } : undefined, + stat: c.stat ? { mtime: c.stat.mtime, size: c.stat.size } : undefined, _reverseDepOnly: c._reverseDepOnly, })); ctx.metadataUpdates = increResult.changed @@ -674,7 +674,7 @@ export async function detectChanges(ctx: PipelineContext): Promise { .map((c) => ({ relPath: c.relPath, hash: c.hash, - stat: { mtime: Math.floor(c.stat.mtimeMs), size: c.stat.size }, + stat: { mtime: c.stat.mtime, size: c.stat.size }, })); if (!ctx.isFullBuild && ctx.parseChanges.length === 0 && ctx.removed.length === 0) { const ranAnalysis = await runPendingAnalysis(ctx); diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index 339b1123..064dea95 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -128,7 +128,7 @@ export function buildFileHashes( size = precomputed.stat.size; } else { const rawStat = fileStat(path.join(rootDir, relPath)); - mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + mtime = rawStat ? rawStat.mtime : 0; size = rawStat ? rawStat.size : 0; } fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size }); @@ -143,7 +143,7 @@ export function buildFileHashes( } if (code !== null) { const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const mtime = stat ? stat.mtime : 0; const size = stat ? stat.size : 0; fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size }); } @@ -365,7 +365,7 @@ function updateFileHashes( size = precomputed.stat.size; } else { const rawStat = fileStat(path.join(rootDir, relPath)); - mtime = rawStat ? Math.floor(rawStat.mtimeMs) : 0; + mtime = rawStat ? rawStat.mtime : 0; size = rawStat ? rawStat.size : 0; } upsertHash.run(relPath, precomputed.hash, mtime, size); @@ -380,7 +380,7 @@ function updateFileHashes( } if (code !== null) { const stat = fileStat(absPath); - const mtime = stat ? Math.floor(stat.mtimeMs) : 0; + const mtime = stat ? stat.mtime : 0; const size = stat ? stat.size : 0; upsertHash.run(relPath, fileHash(code), mtime, size); } diff --git a/tests/builder/detect-changes.test.ts b/tests/builder/detect-changes.test.ts index 7d798ef1..ca9c1ebc 100644 --- a/tests/builder/detect-changes.test.ts +++ b/tests/builder/detect-changes.test.ts @@ -7,6 +7,7 @@ import path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { closeDb, initSchema, openDb } from '../../src/db/index.js'; import { PipelineContext } from '../../src/domain/graph/builder/context.js'; +import { fileStat } from '../../src/domain/graph/builder/helpers.js'; import { detectChanges, detectNoChanges, @@ -62,12 +63,12 @@ describe('detectChanges stage', () => { const content = fs.readFileSync(path.join(dir, 'a.js'), 'utf-8'); const { createHash } = await import('node:crypto'); const hash = createHash('md5').update(content).digest('hex'); - const stat = fs.statSync(path.join(dir, 'a.js')); + const stat = fs.statSync(path.join(dir, 'a.js'), { bigint: true }); db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( 'a.js', hash, - Math.floor(stat.mtimeMs), - stat.size, + Number(stat.mtimeNs / 1_000_000n), + Number(stat.size), ); // Write journal header so journal check doesn't confuse things @@ -158,15 +159,16 @@ describe('detectNoChanges fast-skip', () => { relPath: string, filePath: string, ): { mtime: number; size: number } { - const stat = fs.statSync(filePath); - const mtime = Math.floor(stat.mtimeMs); + const stat = fs.statSync(filePath, { bigint: true }); + const mtime = Number(stat.mtimeNs / 1_000_000n); + const size = Number(stat.size); db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( relPath, 'deadbeef', mtime, - stat.size, + size, ); - return { mtime, size: stat.size }; + return { mtime, size }; } it('returns false when file_hashes is empty (first build)', () => { @@ -221,12 +223,12 @@ describe('detectNoChanges fast-skip', () => { const db = openDb(path.join(dbDir, 'graph.db')); initSchema(db); const file = seedFile(dir, 'a.js', 'export const a = 1;'); - const stat = fs.statSync(file); + const stat = fs.statSync(file, { bigint: true }); db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run( 'a.js', 'deadbeef', - Math.floor(stat.mtimeMs) + 1000, // skewed mtime - stat.size, + Number(stat.mtimeNs / 1_000_000n) + 1000, // skewed mtime + Number(stat.size), ); expect(detectNoChanges(db, [file], dir)).toBe(false); @@ -276,4 +278,20 @@ describe('detectNoChanges fast-skip', () => { closeDb(db); fs.rmSync(dir, { recursive: true, force: true }); }); + + // Pins down the BigInt-nanosecond truncation the helper uses to match Rust's + // `Duration::as_millis() as i64`. Reverting to `Math.floor(stat.mtimeMs)` + // re-introduces #1075: at large epoch values the f64 `mtimeMs` rounds, so a + // Rust-written `file_hashes.mtime` reads back 1ms ahead in JS and busts the + // fast-skip path on every native→JS handoff. + it('fileStat mtime matches Rust Duration::as_millis() truncation (#1075)', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-fileStat-trunc-')); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + + const big = fs.statSync(file, { bigint: true }); + const expected = Number(big.mtimeNs / 1_000_000n); + expect(fileStat(file)?.mtime).toBe(expected); + + fs.rmSync(dir, { recursive: true, force: true }); + }); }); From 1a44aa48ee1c39895699d800fb3af5aa9b014677 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 7 May 2026 19:35:50 -0600 Subject: [PATCH 2/2] test(builder): make #1075 mtime regression test deterministic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Greptile noted that the previous regression test was probabilistic: the f64 ULP rounding bug only triggers on ~0.026% of fresh-file mtimes (the ~256 ns window where Number(mtimeNs)/1e6 rounds across a ms boundary), so a future revert to Math.floor(stat.mtimeMs) would usually pass. Replace with a vi.spyOn stub that injects a hand-picked BigInt mtimeNs known to diverge between the BigInt path (N) and the f64 path (N+1). The test now fails deterministically against the broken implementation — verified locally by reverting fileStat() and watching the assertion flip 1748400000000 → 1748400000001. The fresh-file disk variant is kept alongside as a sanity check that the helper still works against real filesystem stats. --- tests/builder/detect-changes.test.ts | 64 ++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/tests/builder/detect-changes.test.ts b/tests/builder/detect-changes.test.ts index ca9c1ebc..6d0bfba5 100644 --- a/tests/builder/detect-changes.test.ts +++ b/tests/builder/detect-changes.test.ts @@ -4,7 +4,7 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest'; import { closeDb, initSchema, openDb } from '../../src/db/index.js'; import { PipelineContext } from '../../src/domain/graph/builder/context.js'; import { fileStat } from '../../src/domain/graph/builder/helpers.js'; @@ -280,18 +280,54 @@ describe('detectNoChanges fast-skip', () => { }); // Pins down the BigInt-nanosecond truncation the helper uses to match Rust's - // `Duration::as_millis() as i64`. Reverting to `Math.floor(stat.mtimeMs)` - // re-introduces #1075: at large epoch values the f64 `mtimeMs` rounds, so a - // Rust-written `file_hashes.mtime` reads back 1ms ahead in JS and busts the - // fast-skip path on every native→JS handoff. - it('fileStat mtime matches Rust Duration::as_millis() truncation (#1075)', () => { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-fileStat-trunc-')); - const file = seedFile(dir, 'a.js', 'export const a = 1;'); - - const big = fs.statSync(file, { bigint: true }); - const expected = Number(big.mtimeNs / 1_000_000n); - expect(fileStat(file)?.mtime).toBe(expected); - - fs.rmSync(dir, { recursive: true, force: true }); + // `Duration::as_millis() as i64`. We can't trigger the f64 ULP rounding bug + // with a freshly-created file (the failure window is ~256 ns out of every ms, + // ~0.026% of values), so instead we stub `fs.statSync` to return a hand-picked + // BigInt `mtimeNs` whose f64-mtimeMs path diverges from the BigInt path: + // ns = 1748400000000999808n (≈ 2025-05-28 epoch ns) + // BigInt: Number(ns / 1_000_000n) === 1748400000000 + // f64 (broken): Math.floor(Number(ns) / 1e6) === 1748400000001 + // Reverting `fileStat` to `Math.floor(stat.mtimeMs)` would flip the result to + // N+1 and fail the assertion deterministically — re-introducing #1075 (the + // Rust-written `file_hashes.mtime` of N reading back as N+1 in JS, busting + // the fast-skip path on every native→JS handoff). + describe('fileStat #1075 mtime truncation', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('matches Rust Duration::as_millis() truncation at f64-rounding boundary', () => { + // Hand-picked epoch ns where Number(ns)/1e6 rounds up across a ms boundary. + const badMtimeNs = 1748400000000999808n; + const truncatedMs = 1748400000000; + const roundedMs = 1748400000001; + + // Sanity: confirm the chosen value actually triggers the divergence; if a + // future Node.js release changes f64 rounding, this baseline assertion + // catches it before we trust the spy-based test below. + expect(Number(badMtimeNs / 1_000_000n)).toBe(truncatedMs); + expect(Math.floor(Number(badMtimeNs) / 1e6)).toBe(roundedMs); + + const stubStats = { + mtimeNs: badMtimeNs, + mtimeMs: Number(badMtimeNs) / 1e6, + size: 42n, + } as unknown as fs.BigIntStats; + vi.spyOn(fs, 'statSync').mockReturnValue(stubStats); + + // BigInt path must win: N, not N+1. + expect(fileStat('/fake/path.js')?.mtime).toBe(truncatedMs); + }); + + it('returns the BigInt-truncated mtime for a real file on disk', () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-fileStat-trunc-')); + const file = seedFile(dir, 'a.js', 'export const a = 1;'); + + const big = fs.statSync(file, { bigint: true }); + const expected = Number(big.mtimeNs / 1_000_000n); + expect(fileStat(file)?.mtime).toBe(expected); + + fs.rmSync(dir, { recursive: true, force: true }); + }); }); });