diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2001e919..551dc5c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,9 +61,13 @@ jobs: with: workspaces: crates/codegraph-core - # Force clang on Linux runners. With gcc (the default cc) the bundled - # C parsers in tree-sitter-hcl produce a binary whose HCL extractor - # silently drops files (#1054). Clang produces a working binary. + # NOTE: this step was added in #1059 on the assumption that gcc-vs-clang + # was responsible for the HCL .tf drop in #1054, but the real root cause + # was a tree-sitter ABI mismatch (HCL grammar shipped ABI 15 while the + # runtime was pinned at ABI 14). That is fixed by the tree-sitter 0.25 + # bump in this PR, so this clang override is now redundant. Kept in place + # here to keep this PR scoped to the ABI fix; a follow-up revert removes + # the step entirely. - name: Use clang on Linux if: matrix.os == 'ubuntu-latest' run: | @@ -78,6 +82,14 @@ jobs: working-directory: crates/codegraph-core run: napi build --release + # Runs `cargo test`, which exercises the grammar-ABI regression test + # added in #1054. Without this step a future tree-sitter / grammar + # version drift would only surface as a runtime "files dropped" + # warning during benchmarks, not as a test failure on the PR. + - name: Run Rust tests + working-directory: crates/codegraph-core + run: cargo test --release + - name: Upload artifact uses: actions/upload-artifact@v7 with: diff --git a/Cargo.lock b/Cargo.lock index fedf6a1c..413504b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.9.5" +version = "3.9.6" dependencies = [ "globset", "ignore", @@ -204,6 +204,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -342,13 +348,19 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" + [[package]] name = "hashlink" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -367,6 +379,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.0", +] + [[package]] name = "itoa" version = "1.0.18" @@ -639,6 +661,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap", "itoa", "memchr", "serde", @@ -694,13 +717,14 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.24.7" +version = "0.25.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" dependencies = [ "cc", "regex", "regex-syntax", + "serde_json", "streaming-iterator", "tree-sitter-language", ] diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 1c43718c..df4361e1 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -12,7 +12,7 @@ napi = { version = "3", features = ["serde-json"] } napi-derive = "3" serde = { version = "1", features = ["derive"] } serde_json = "1" -tree-sitter = "0.24" +tree-sitter = "0.25" tree-sitter-javascript = "0.23" tree-sitter-typescript = "0.23" tree-sitter-python = "0.23" diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index cbf46a0f..c87957f2 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -162,4 +162,101 @@ impl LanguageKind { Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), } } + + /// Every variant in declaration order. Adding a new `LanguageKind` variant + /// requires adding it here too — the regression test in this file's + /// `tests` module iterates this list to confirm each grammar loads at + /// runtime, so missing entries silently lose ABI coverage for that + /// language. See #1054 (tree-sitter-hcl 1.1.0 shipped ABI 15 while the + /// runtime was pinned at ABI 14, and `set_language` rejected the grammar + /// at runtime instead of at compile time). + pub fn all() -> &'static [LanguageKind] { + use LanguageKind::*; + &[ + JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, + Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, + OcamlInterface, + ] + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + /// Catches tree-sitter ABI version mismatches between the runtime crate + /// and individual grammar crates. When a grammar ships parser code built + /// against a newer ABI than the runtime supports, `set_language` rejects + /// it with `LanguageError`, `parse_file` silently returns `None`, and + /// every file in that language is "dropped" — the user sees a warning + /// and the JS layer falls back to WASM. See #1054 (tree-sitter-hcl 1.1.0 + /// vs tree-sitter 0.24). + #[test] + fn all_grammars_have_compatible_abi() { + let mut failures: Vec = Vec::new(); + for &kind in LanguageKind::all() { + let mut parser = Parser::new(); + let language = kind.tree_sitter_language(); + if let Err(e) = parser.set_language(&language) { + failures.push(format!(" {:?}: {:?}", kind, e)); + } + } + assert!( + failures.is_empty(), + "Tree-sitter grammar ABI mismatch — bump `tree-sitter` in Cargo.toml \ + or pin the failing grammar crate down (#1054):\n{}", + failures.join("\n") + ); + } + + /// Every variant declared in the enum must appear in `all()`. Without + /// this check, a new variant added to the enum would silently lose + /// ABI coverage from `all_grammars_have_compatible_abi`. + #[test] + fn all_kinds_listed_in_all() { + // Exhaustive match — fails to compile if a variant is added without + // updating the body. The match itself is a no-op; the compile-time + // exhaustiveness check is the test. If this match starts failing, + // also update `LanguageKind::all()`. + let kind = LanguageKind::JavaScript; + let _: () = match kind { + LanguageKind::JavaScript + | LanguageKind::TypeScript + | LanguageKind::Tsx + | LanguageKind::Python + | LanguageKind::Go + | LanguageKind::Rust + | LanguageKind::Java + | LanguageKind::CSharp + | LanguageKind::Ruby + | LanguageKind::Php + | LanguageKind::Hcl + | LanguageKind::C + | LanguageKind::Cpp + | LanguageKind::Kotlin + | LanguageKind::Swift + | LanguageKind::Scala + | LanguageKind::Bash + | LanguageKind::Elixir + | LanguageKind::Lua + | LanguageKind::Dart + | LanguageKind::Zig + | LanguageKind::Haskell + | LanguageKind::Ocaml + | LanguageKind::OcamlInterface => (), + }; + // IMPORTANT: this constant must equal the number of arms in the match + // above AND the length of the slice returned by `LanguageKind::all()`. + // Because both checks require the same manual update, they reinforce + // each other: a developer who updates the match is reminded to also + // update `all()` and this count. + const EXPECTED_LEN: usize = 24; + assert_eq!( + LanguageKind::all().len(), + EXPECTED_LEN, + "A LanguageKind variant is in the exhaustive match but missing from \ + `all()` (or vice-versa). Update `all()` and bump EXPECTED_LEN.", + ); + } }