From b17f1bdc0b2bc1629bdb5a1b92c58b79d19af07c Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 29 Apr 2026 22:20:18 -0600
Subject: [PATCH 1/3] perf(native): batch-load file/symbol node IDs in edges
 phase (#1013)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces per-import and per-file `conn.query_row` calls in
`build_import_edges` and `build_and_insert_call_edges` with one-shot
HashMap pre-loads. Each `query_row` ran a fresh sqlite3_prepare/step/
finalize cycle; on a ~470-file repo this paid ~2.3k cycles in the
import-edge stage alone, dominating the edges phase.

Also chunks the import-edge insert into multi-row VALUES batches
(199 rows × 5 params), mirroring `edges_db::do_insert_edges`, to
remove the per-row prepared-statement bind/step/reset overhead.

Self-build benchmark (codegraph on itself, 743 files):

  edges:  124.5 ms native vs 193 ms wasm  (was 310 ms / 179 ms in 3.9.5)
  roles:   73.7 ms native vs  70 ms wasm  (was 269 ms /  62 ms in 3.9.5)

Roles converged as a side effect — its prior tail was driven by
shared SQLite cache pressure during the same build session.

Closes #1013

(docs check acknowledged: internal perf fix, no API/language/feature surface changes)
---
 crates/codegraph-core/src/build_pipeline.rs |  30 +++--
 crates/codegraph-core/src/import_edges.rs   | 135 +++++++++++++++-----
 2 files changed, 123 insertions(+), 42 deletions(-)
diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs
index 92ef2b1f..364b02c8 100644
--- a/crates/codegraph-core/src/build_pipeline.rs
+++ b/crates/codegraph-core/src/build_pipeline.rs
@@ -1081,6 +1081,25 @@ fn build_and_insert_call_edges(
     .map(String::from)
     .collect();
 
+    // Pre-load every file node ID into a HashMap with one query, replacing
+    // the per-file `query_row` cycle that paid a fresh sqlite3_prepare for
+    // each entry in `file_symbols` (#1013).
+    let file_node_ids: HashMap<String, u32> = {
+        let mut map = HashMap::new();
+        if let Ok(mut stmt) =
+            conn.prepare("SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0")
+        {
+            if let Ok(rows) = stmt.query_map([], |row| {
+                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as u32))
+            }) {
+                for r in rows.flatten() {
+                    map.insert(r.0, r.1);
+                }
+            }
+        }
+        map
+    };
+
     // Build FileEdgeInput entries for the native edge builder
     let mut file_entries: Vec<FileEdgeInput> = Vec::new();
     for (rel_path, symbols) in file_symbols {
@@ -1088,14 +1107,9 @@ fn build_and_insert_call_edges(
             continue;
         }
 
-        // Look up file node ID
-        let file_node_id: u32 = match conn.query_row(
-            "SELECT id FROM nodes WHERE name = ? AND kind = 'file' AND file = ? AND line = 0",
-            [rel_path, rel_path],
-            |row| row.get::<_, i64>(0),
-        ) {
-            Ok(id) => id as u32,
-            Err(_) => continue,
+        let file_node_id: u32 = match file_node_ids.get(rel_path) {
+            Some(&id) => id,
+            None => continue,
         };
 
         // Build imported names from resolved imports
diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs
index f0f517c2..251727a2 100644
--- a/crates/codegraph-core/src/import_edges.rs
+++ b/crates/codegraph-core/src/import_edges.rs
@@ -153,24 +153,48 @@ pub fn detect_barrel_only_files(ctx: &ImportEdgeContext) -> HashSet<String> {
     barrel_only
 }
 
-/// Look up a file node ID from the database.
-fn get_file_node_id(conn: &Connection, rel_path: &str) -> Option<i64> {
-    conn.query_row(
-        "SELECT id FROM nodes WHERE name = ? AND kind = 'file' AND file = ? AND line = 0",
-        [rel_path, rel_path],
-        |row| row.get(0),
-    )
-    .ok()
+/// Load every file node ID into a HashMap in one query — replaces per-import
+/// `conn.query_row` lookups that paid the SQLite prepare/execute cycle on each
+/// call (#1013).
+fn load_file_node_ids(conn: &Connection) -> HashMap<String, i64> {
+    let mut map = HashMap::new();
+    if let Ok(mut stmt) =
+        conn.prepare("SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0")
+    {
+        if let Ok(rows) = stmt.query_map([], |row| {
+            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
+        }) {
+            for r in rows.flatten() {
+                map.insert(r.0, r.1);
+            }
+        }
+    }
+    map
 }
 
-/// Look up the first symbol node ID by name and file (for type-only import resolution).
-fn get_symbol_node_id(conn: &Connection, name: &str, file: &str) -> Option<i64> {
-    conn.query_row(
-        "SELECT id FROM nodes WHERE name = ? AND file = ? AND kind != 'file' LIMIT 1",
-        [name, file],
-        |row| row.get(0),
-    )
-    .ok()
+/// Load every (name, file) -> id mapping for non-file nodes in one query.
+/// Mirrors the JS `nodesByNameAndFile` lookup map; preserves the first-row
+/// semantics of the legacy `LIMIT 1` query by keeping the first ID seen per
+/// key. Skipped entirely when no type-only imports exist (saves one full
+/// scan of `nodes` on the common case).
+fn load_symbol_node_ids(conn: &Connection) -> HashMap<(String, String), i64> {
+    let mut map = HashMap::new();
+    if let Ok(mut stmt) =
+        conn.prepare("SELECT name, file, id FROM nodes WHERE kind != 'file'")
+    {
+        if let Ok(rows) = stmt.query_map([], |row| {
+            Ok((
+                row.get::<_, String>(0)?,
+                row.get::<_, String>(1)?,
+                row.get::<_, i64>(2)?,
+            ))
+        }) {
+            for r in rows.flatten() {
+                map.entry((r.0, r.1)).or_insert(r.2);
+            }
+        }
+    }
+    map
 }
 
 /// Build import edges from parsed file symbols.
@@ -185,10 +209,23 @@ fn get_symbol_node_id(conn: &Connection, name: &str, file: &str) -> Option<i64>
 pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<EdgeRow> {
     let mut edges = Vec::new();
 
+    // Pre-load all file node IDs once. Previously this was N x query_row,
+    // each of which ran a fresh sqlite3_prepare/step/finalize cycle (#1013).
+    let file_node_ids = load_file_node_ids(conn);
+    let needs_symbol_map = ctx
+        .file_symbols
+        .values()
+        .any(|s| s.imports.iter().any(|i| i.type_only.unwrap_or(false)));
+    let symbol_node_ids = if needs_symbol_map {
+        load_symbol_node_ids(conn)
+    } else {
+        HashMap::new()
+    };
+
     for (rel_path, symbols) in &ctx.file_symbols {
         let is_barrel_only = ctx.barrel_only_files.contains(rel_path);
-        let file_node_id = match get_file_node_id(conn, rel_path) {
-            Some(id) => id,
+        let file_node_id = match file_node_ids.get(rel_path) {
+            Some(&id) => id,
             None => continue,
         };
 
@@ -203,8 +240,8 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
             }
 
             let resolved_path = ctx.get_resolved(abs_str, &imp.source);
-            let target_id = match get_file_node_id(conn, &resolved_path) {
-                Some(id) => id,
+            let target_id = match file_node_ids.get(&resolved_path) {
+                Some(&id) => id,
                 None => continue,
             };
 
@@ -238,7 +275,9 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
                             target_file = actual;
                         }
                     }
-                    if let Some(sym_id) = get_symbol_node_id(conn, clean_name, &target_file) {
+                    if let Some(&sym_id) =
+                        symbol_node_ids.get(&(clean_name.to_string(), target_file))
+                    {
                         edges.push(EdgeRow {
                             source_id: file_node_id,
                             target_id: sym_id,
@@ -262,7 +301,7 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
                         if actual_source != resolved_path
                             && resolved_sources.insert(actual_source.clone())
                         {
-                            if let Some(actual_id) = get_file_node_id(conn, &actual_source) {
+                            if let Some(&actual_id) = file_node_ids.get(&actual_source) {
                                 let through_kind = match edge_kind {
                                     "imports-type" => "imports-type",
                                     "dynamic-imports" => "dynamic-imports",
@@ -286,7 +325,15 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
     edges
 }
 
-/// Batch insert edges into the database.
+/// 199 rows × 5 params = 995 bind parameters, safely under the legacy
+/// `SQLITE_MAX_VARIABLE_NUMBER` default of 999. Mirrors `edges_db::CHUNK`.
+const INSERT_CHUNK: usize = 199;
+
+/// Batch insert edges into the database using multi-row VALUES chunks.
+///
+/// Replaces the previous one-prepared-statement-per-row pattern that paid a
+/// per-edge bind/step/reset cycle. With the chunked path each chunk runs a
+/// single VM execution against a cached statement (#1013).
 pub fn insert_edges(conn: &Connection, edges: &[EdgeRow]) {
     if edges.is_empty() {
         return;
@@ -295,18 +342,38 @@ pub fn insert_edges(conn: &Connection, edges: &[EdgeRow]) {
         Ok(tx) => tx,
         Err(_) => return,
     };
-    if let Ok(mut stmt) = tx.prepare(
-        "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, ?, ?)",
-    ) {
-        for e in edges {
-            let _ = stmt.execute(rusqlite::params![
-                e.source_id,
-                e.target_id,
-                e.kind,
-                e.confidence,
-                e.dynamic,
-            ]);
+
+    for chunk in edges.chunks(INSERT_CHUNK) {
+        let placeholders: Vec<String> = (0..chunk.len())
+            .map(|i| {
+                let base = i * 5;
+                format!(
+                    "(?{},?{},?{},?{},?{})",
+                    base + 1,
+                    base + 2,
+                    base + 3,
+                    base + 4,
+                    base + 5
+                )
+            })
+            .collect();
+        let sql = format!(
+            "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES {}",
+            placeholders.join(",")
+        );
+        let mut stmt = match tx.prepare_cached(&sql) {
+            Ok(s) => s,
+            Err(_) => continue,
+        };
+        for (i, edge) in chunk.iter().enumerate() {
+            let base = i * 5;
+            let _ = stmt.raw_bind_parameter(base + 1, edge.source_id);
+            let _ = stmt.raw_bind_parameter(base + 2, edge.target_id);
+            let _ = stmt.raw_bind_parameter(base + 3, edge.kind.as_str());
+            let _ = stmt.raw_bind_parameter(base + 4, edge.confidence);
+            let _ = stmt.raw_bind_parameter(base + 5, edge.dynamic);
         }
+        let _ = stmt.raw_execute();
     }
     let _ = tx.commit();
 }

From 1081b64d82d49c143f638b30ea24c507a268dadd Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 29 Apr 2026 23:01:34 -0600
Subject: [PATCH 2/3] fix(native): bound symbol-node lookup and restore
 name=file guard (#1028)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address two P2 review concerns from greptile on #1013:

1. **Bounded symbol-node lookup.** `load_symbol_node_ids` previously did
   an unbounded `SELECT name, file, id FROM nodes WHERE kind != 'file'`,
   loading every non-file symbol into memory. On 100k+-symbol monorepos
   this could push hundreds of MB. Now we walk type-only imports up
   front to collect the distinct `(name, file)` pairs we'll actually
   need, then issue a chunked `(name, file) IN (...)` query (332 pairs
   per chunk × 2 binds = 664, safely under `SQLITE_MAX_VARIABLE_NUMBER`).
   The full-scan path is gone; only the symbols referenced by type-only
   imports are hit, preserving the one-round-trip win without the
   memory blow-up.

2. **Restore `name = file` guard on file-node lookups.** The original
   per-row query bound `rel_path` to both `name = ?` and `file = ?`,
   matching only nodes where the two columns agreed. The bulk query
   keyed on `file` alone, so an unrelated file-kind row sharing the
   same `file` value (different `name`) could silently overwrite the
   map entry. Add `AND name = file` to both `load_file_node_ids` and
   the parallel pre-load in `build_and_insert_call_edges` to keep the
   legacy semantics explicit.
---
 crates/codegraph-core/src/build_pipeline.rs |  12 +-
 crates/codegraph-core/src/import_edges.rs   | 129 +++++++++++++++-----
 2 files changed, 109 insertions(+), 32 deletions(-)

diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs
index 364b02c8..72644d4d 100644
--- a/crates/codegraph-core/src/build_pipeline.rs
+++ b/crates/codegraph-core/src/build_pipeline.rs
@@ -1084,11 +1084,17 @@ fn build_and_insert_call_edges(
     // Pre-load every file node ID into a HashMap with one query, replacing
     // the per-file `query_row` cycle that paid a fresh sqlite3_prepare for
     // each entry in `file_symbols` (#1013).
+    //
+    // The `name = file` predicate matches the legacy per-row lookup
+    // (`WHERE name = ? AND file = ?` with both binds set to `rel_path`).
+    // For file-kind nodes `name` and `file` are conventionally identical,
+    // but keeping the guard prevents an unrelated row from silently
+    // overwriting the map entry for `file` (#1028 review).
     let file_node_ids: HashMap<String, u32> = {
         let mut map = HashMap::new();
-        if let Ok(mut stmt) =
-            conn.prepare("SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0")
-        {
+        if let Ok(mut stmt) = conn.prepare(
+            "SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0 AND name = file",
+        ) {
             if let Ok(rows) = stmt.query_map([], |row| {
                 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as u32))
             }) {
diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs
index 251727a2..23a90b70 100644
--- a/crates/codegraph-core/src/import_edges.rs
+++ b/crates/codegraph-core/src/import_edges.rs
@@ -156,11 +156,17 @@ pub fn detect_barrel_only_files(ctx: &ImportEdgeContext) -> HashSet<String> {
 /// Load every file node ID into a HashMap in one query — replaces per-import
 /// `conn.query_row` lookups that paid the SQLite prepare/execute cycle on each
 /// call (#1013).
+///
+/// Includes the explicit `name = file` predicate that matched the legacy
+/// per-row lookup (`WHERE name = ? AND file = ?` with both binds set to
+/// `rel_path`). For file-kind nodes `name` and `file` are conventionally
+/// identical, but keeping the guard prevents an unrelated row from silently
+/// overwriting the map entry for `file`.
 fn load_file_node_ids(conn: &Connection) -> HashMap<String, i64> {
     let mut map = HashMap::new();
-    if let Ok(mut stmt) =
-        conn.prepare("SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0")
-    {
+    if let Ok(mut stmt) = conn.prepare(
+        "SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0 AND name = file",
+    ) {
         if let Ok(rows) = stmt.query_map([], |row| {
             Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
         }) {
@@ -172,31 +178,95 @@ fn load_file_node_ids(conn: &Connection) -> HashMap<String, i64> {
     map
 }
 
-/// Load every (name, file) -> id mapping for non-file nodes in one query.
-/// Mirrors the JS `nodesByNameAndFile` lookup map; preserves the first-row
-/// semantics of the legacy `LIMIT 1` query by keeping the first ID seen per
-/// key. Skipped entirely when no type-only imports exist (saves one full
-/// scan of `nodes` on the common case).
-fn load_symbol_node_ids(conn: &Connection) -> HashMap<(String, String), i64> {
-    let mut map = HashMap::new();
-    if let Ok(mut stmt) =
-        conn.prepare("SELECT name, file, id FROM nodes WHERE kind != 'file'")
-    {
-        if let Ok(rows) = stmt.query_map([], |row| {
-            Ok((
-                row.get::<_, String>(0)?,
-                row.get::<_, String>(1)?,
-                row.get::<_, i64>(2)?,
-            ))
-        }) {
-            for r in rows.flatten() {
-                map.entry((r.0, r.1)).or_insert(r.2);
+/// Load symbol node IDs for the supplied `(name, file)` pairs in one chunked
+/// query. Mirrors the JS `nodesByNameAndFile` lookup map; preserves the
+/// first-row semantics of the legacy `LIMIT 1` query by keeping the first ID
+/// seen per key.
+///
+/// The pairs are pre-computed by walking the type-only imports in
+/// `ctx.file_symbols`, so we never scan the entire `nodes` table — even on
+/// monorepos with 100k+ symbols, only the small slice actually referenced by
+/// type-only imports is hit (#1013, #1028 review).
+fn load_symbol_node_ids(
+    conn: &Connection,
+    needed_pairs: &HashSet<(String, String)>,
+) -> HashMap<(String, String), i64> {
+    let mut map: HashMap<(String, String), i64> = HashMap::new();
+    if needed_pairs.is_empty() {
+        return map;
+    }
+
+    // 332 pairs × 2 params + 1 spare = 665 binds, comfortably under
+    // `SQLITE_MAX_VARIABLE_NUMBER`'s legacy 999 default.
+    const SYMBOL_LOOKUP_CHUNK: usize = 332;
+
+    let pairs: Vec<&(String, String)> = needed_pairs.iter().collect();
+    for chunk in pairs.chunks(SYMBOL_LOOKUP_CHUNK) {
+        let placeholders: Vec<String> = (0..chunk.len())
+            .map(|i| {
+                let base = i * 2;
+                format!("(?{},?{})", base + 1, base + 2)
+            })
+            .collect();
+        let sql = format!(
+            "SELECT name, file, id FROM nodes WHERE kind != 'file' AND (name, file) IN ({})",
+            placeholders.join(",")
+        );
+        let mut params: Vec<&dyn rusqlite::ToSql> = Vec::with_capacity(chunk.len() * 2);
+        for (name, file) in chunk {
+            params.push(name);
+            params.push(file);
+        }
+
+        if let Ok(mut stmt) = conn.prepare(&sql) {
+            if let Ok(rows) = stmt.query_map(rusqlite::params_from_iter(params.iter()), |row| {
+                Ok((
+                    row.get::<_, String>(0)?,
+                    row.get::<_, String>(1)?,
+                    row.get::<_, i64>(2)?,
+                ))
+            }) {
+                for r in rows.flatten() {
+                    map.entry((r.0, r.1)).or_insert(r.2);
+                }
             }
         }
     }
     map
 }
 
+/// Walk type-only imports in `ctx.file_symbols` and return the distinct
+/// `(name, file)` pairs that `build_import_edges` will need to look up.
+/// Resolves barrel files the same way the edge-building loop does so the
+/// pre-computed set matches the actual lookup keys.
+fn collect_type_only_lookup_pairs(ctx: &ImportEdgeContext) -> HashSet<(String, String)> {
+    let mut pairs = HashSet::new();
+    for (rel_path, symbols) in &ctx.file_symbols {
+        let abs_file = Path::new(&ctx.root_dir).join(rel_path);
+        let abs_str = abs_file.to_str().unwrap_or("");
+        for imp in &symbols.imports {
+            if !imp.type_only.unwrap_or(false) {
+                continue;
+            }
+            let resolved_path = ctx.get_resolved(abs_str, &imp.source);
+            for name in &imp.names {
+                let clean_name = name.strip_prefix("* as ").unwrap_or(name);
+                let mut target_file = resolved_path.clone();
+                if ctx.is_barrel_file(&resolved_path) {
+                    let mut visited = HashSet::new();
+                    if let Some(actual) =
+                        ctx.resolve_barrel_export(&resolved_path, clean_name, &mut visited)
+                    {
+                        target_file = actual;
+                    }
+                }
+                pairs.insert((clean_name.to_string(), target_file));
+            }
+        }
+    }
+    pairs
+}
+
 /// Build import edges from parsed file symbols.
 ///
 /// For each file's imports, resolves the target path and creates edges:
@@ -212,14 +282,15 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
     // Pre-load all file node IDs once. Previously this was N x query_row,
     // each of which ran a fresh sqlite3_prepare/step/finalize cycle (#1013).
     let file_node_ids = load_file_node_ids(conn);
-    let needs_symbol_map = ctx
-        .file_symbols
-        .values()
-        .any(|s| s.imports.iter().any(|i| i.type_only.unwrap_or(false)));
-    let symbol_node_ids = if needs_symbol_map {
-        load_symbol_node_ids(conn)
-    } else {
+    // Only the symbols actually referenced by type-only imports are needed —
+    // skip the lookup entirely when no type-only imports exist (the common
+    // case), and otherwise issue a chunked `(name, file) IN (...)` query so
+    // memory stays bounded even on large monorepos (#1028 review).
+    let needed_symbol_pairs = collect_type_only_lookup_pairs(ctx);
+    let symbol_node_ids = if needed_symbol_pairs.is_empty() {
         HashMap::new()
+    } else {
+        load_symbol_node_ids(conn, &needed_symbol_pairs)
     };
 
     for (rel_path, symbols) in &ctx.file_symbols {

From cb492ef1fa1a5ad11fe07b60b0481653463ec0a3 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 29 Apr 2026 23:07:58 -0600
Subject: [PATCH 3/3] fix(native): surface bind/execute errors in insert_edges
 (#1028)

Greptile flagged two P2 issues in `insert_edges`:

1. Silent bind failures: `let _ = stmt.raw_bind_parameter(...)` silently
   discarded errors. A failed bind would leave that position unbound
   (NULL), causing `raw_execute()` to insert ghost edge rows with NULL
   `source_id`/`target_id`. Bind/execute now run inside a fallible
   `insert_edge_chunk` helper; failures emit a stderr warning and the
   chunk is skipped instead of producing partial rows.

2. `prepare_cached` mismatched with dynamic SQL: the SQL string varies
   with chunk length, so trailing partial chunks were always cache
   misses. Switched to plain `tx.prepare(&sql)` to match intent.

No behavioural change for the success path. cargo test
-p codegraph-core --lib: 181 passed.
---
 crates/codegraph-core/src/import_edges.rs | 89 +++++++++++++++--------
 1 file changed, 57 insertions(+), 32 deletions(-)

diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs
index 23a90b70..45847692 100644
--- a/crates/codegraph-core/src/import_edges.rs
+++ b/crates/codegraph-core/src/import_edges.rs
@@ -404,49 +404,74 @@ const INSERT_CHUNK: usize = 199;
 ///
 /// Replaces the previous one-prepared-statement-per-row pattern that paid a
 /// per-edge bind/step/reset cycle. With the chunked path each chunk runs a
-/// single VM execution against a cached statement (#1013).
+/// single VM execution against a freshly prepared statement (#1013).
+///
+/// Bind/execute errors are surfaced via a stderr warning and the offending
+/// chunk is skipped — silently swallowing them previously could produce
+/// `NULL` columns in the inserted edge rows.
 pub fn insert_edges(conn: &Connection, edges: &[EdgeRow]) {
     if edges.is_empty() {
         return;
     }
     let tx = match conn.unchecked_transaction() {
         Ok(tx) => tx,
-        Err(_) => return,
+        Err(e) => {
+            eprintln!("[codegraph] insert_edges: failed to start transaction: {e}");
+            return;
+        }
     };
 
     for chunk in edges.chunks(INSERT_CHUNK) {
-        let placeholders: Vec<String> = (0..chunk.len())
-            .map(|i| {
-                let base = i * 5;
-                format!(
-                    "(?{},?{},?{},?{},?{})",
-                    base + 1,
-                    base + 2,
-                    base + 3,
-                    base + 4,
-                    base + 5
-                )
-            })
-            .collect();
-        let sql = format!(
-            "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES {}",
-            placeholders.join(",")
-        );
-        let mut stmt = match tx.prepare_cached(&sql) {
-            Ok(s) => s,
-            Err(_) => continue,
-        };
-        for (i, edge) in chunk.iter().enumerate() {
-            let base = i * 5;
-            let _ = stmt.raw_bind_parameter(base + 1, edge.source_id);
-            let _ = stmt.raw_bind_parameter(base + 2, edge.target_id);
-            let _ = stmt.raw_bind_parameter(base + 3, edge.kind.as_str());
-            let _ = stmt.raw_bind_parameter(base + 4, edge.confidence);
-            let _ = stmt.raw_bind_parameter(base + 5, edge.dynamic);
+        if let Err(e) = insert_edge_chunk(&tx, chunk) {
+            eprintln!(
+                "[codegraph] insert_edges: skipped chunk of {} rows due to error: {e}",
+                chunk.len()
+            );
         }
-        let _ = stmt.raw_execute();
     }
-    let _ = tx.commit();
+    if let Err(e) = tx.commit() {
+        eprintln!("[codegraph] insert_edges: commit failed: {e}");
+    }
+}
+
+/// Bind and execute a single chunk in its own fallible scope so the caller
+/// can log the failure and continue with the next chunk.
+///
+/// `prepare` (not `prepare_cached`) is used because the SQL string varies
+/// with chunk length — caching keyed on dynamic SQL would churn the LRU
+/// for every partial trailing chunk and obscure the intent of the cache.
+fn insert_edge_chunk(
+    tx: &rusqlite::Transaction<'_>,
+    chunk: &[EdgeRow],
+) -> rusqlite::Result<()> {
+    let placeholders: Vec<String> = (0..chunk.len())
+        .map(|i| {
+            let base = i * 5;
+            format!(
+                "(?{},?{},?{},?{},?{})",
+                base + 1,
+                base + 2,
+                base + 3,
+                base + 4,
+                base + 5
+            )
+        })
+        .collect();
+    let sql = format!(
+        "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES {}",
+        placeholders.join(",")
+    );
+    let mut stmt = tx.prepare(&sql)?;
+    for (i, edge) in chunk.iter().enumerate() {
+        let base = i * 5;
+        stmt.raw_bind_parameter(base + 1, edge.source_id)?;
+        stmt.raw_bind_parameter(base + 2, edge.target_id)?;
+        stmt.raw_bind_parameter(base + 3, edge.kind.as_str())?;
+        stmt.raw_bind_parameter(base + 4, edge.confidence)?;
+        stmt.raw_bind_parameter(base + 5, edge.dynamic)?;
+    }
+    stmt.raw_execute()?;
+    Ok(())
 }
 
 #[cfg(test)]