From df193727135a482b2756f920063577bc5a85e8cc Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 03:39:02 -0600 Subject: [PATCH 1/3] feat(native): port CUDA extractor to Rust Adds tree-sitter-cuda dependency and native extractor matching the WASM-side behavior for CUDA symbol, import, and call extraction. Part of #1071 --- Cargo.lock | 11 + crates/codegraph-core/Cargo.toml | 1 + crates/codegraph-core/src/extractors/cuda.rs | 550 ++++++++++++++++++ .../codegraph-core/src/extractors/helpers.rs | 14 + crates/codegraph-core/src/extractors/mod.rs | 4 + crates/codegraph-core/src/file_collector.rs | 5 +- crates/codegraph-core/src/parser_registry.rs | 12 +- src/ast-analysis/rules/index.ts | 9 + src/domain/parser.ts | 2 + .../native-drop-classification.test.ts | 3 +- 10 files changed, 604 insertions(+), 7 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/cuda.rs diff --git a/Cargo.lock b/Cargo.lock index 413504b0d..d4dcd5355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -84,6 +84,7 @@ dependencies = [ "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", + "tree-sitter-cuda", "tree-sitter-dart", "tree-sitter-elixir", "tree-sitter-go", @@ -769,6 +770,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-cuda" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715eecfee69b15991de5b9f78009c6d4cb34e18d20d028304a75d38528cddb45" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-dart" version = "0.0.4" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index df4361e17..ee3ae3c1f 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -24,6 +24,7 @@ tree-sitter-ruby = "0.23" tree-sitter-php = "0.23" tree-sitter-c = "0.23" tree-sitter-cpp = "0.23" +tree-sitter-cuda = "0.21" tree-sitter-kotlin-sg = "0.4" tree-sitter-swift = "0.6" tree-sitter-scala = "0.24" diff --git a/crates/codegraph-core/src/extractors/cuda.rs b/crates/codegraph-core/src/extractors/cuda.rs new file mode 100644 index 000000000..19746b9bb --- /dev/null +++ b/crates/codegraph-core/src/extractors/cuda.rs @@ -0,0 +1,550 @@ +//! CUDA extractor. +//! +//! CUDA is a C++ superset. The tree-sitter-cuda grammar extends C++ with +//! `__global__`/`__device__`/`__host__`/`__shared__`/`__constant__` +//! qualifiers and kernel launch syntax (`<<<...>>>`). This mirrors the JS +//! extractor in `src/extractors/cuda.ts`: identical node-handler set to C++ +//! plus CUDA-specific qualifier detection emitted as `decorators` on +//! function/method definitions. +//! +//! See `crates/codegraph-core/src/extractors/cpp.rs` for the close cousin +//! whose patterns this file reuses. + +use super::helpers::*; +use super::SymbolExtractor; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use tree_sitter::{Node, Tree}; + +pub struct CudaExtractor; + +impl SymbolExtractor for CudaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_cuda_node); + walk_ast_nodes_with_config( + &tree.root_node(), + source, + &mut symbols.ast_nodes, + &CUDA_AST_CONFIG, + ); + symbols + } +} + +// ── CUDA-specific qualifiers ──────────────────────────────────────────────── + +const CUDA_QUALIFIERS: &[&str] = &[ + "__global__", + "__device__", + "__host__", + "__shared__", + "__constant__", +]; + +fn is_cuda_qualifier(text: &str) -> bool { + CUDA_QUALIFIERS.contains(&text) +} + +/// Collect CUDA qualifiers attached to a function_definition. +/// +/// Mirrors `extractCudaQualifiers` in `src/extractors/cuda.ts`: scan direct +/// children of the function_definition node, accepting either: +/// - a bare token whose text matches a CUDA qualifier, or +/// - a `storage_class_specifier`/`attribute_specifier` wrapper whose text +/// matches a CUDA qualifier. +/// +/// The JS implementation uses `else if` to avoid emitting the same qualifier +/// twice when a wrapper node's text also matches; the match arms here +/// preserve that ordering. +fn extract_cuda_qualifiers(node: &Node, source: &[u8]) -> Vec { + let mut qualifiers = Vec::new(); + for i in 0..node.child_count() { + let child = match node.child(i) { + Some(c) => c, + None => continue, + }; + let kind = child.kind(); + let text = node_text(&child, source); + if kind == "storage_class_specifier" || kind == "attribute_specifier" { + if is_cuda_qualifier(text) { + qualifiers.push(text.to_string()); + } + } else if is_cuda_qualifier(text) { + qualifiers.push(text.to_string()); + } + } + qualifiers +} + +// ── Declarator helpers (mirror cpp.rs) ────────────────────────────────────── + +fn unwrap_cuda_declarator(node: &Node, source: &[u8]) -> String { + let mut current = *node; + loop { + match current.kind() { + "pointer_declarator" + | "reference_declarator" + | "array_declarator" + | "parenthesized_declarator" => { + if let Some(inner) = current.child_by_field_name("declarator") { + current = inner; + } else { + break; + } + } + "identifier" | "field_identifier" => { + return node_text(¤t, source).to_string(); + } + _ => break, + } + } + node_text(¤t, source).to_string() +} + +fn extract_cuda_function_name(node: &Node, source: &[u8]) -> Option { + let declarator = node.child_by_field_name("declarator")?; + extract_cuda_func_name_from_declarator(&declarator, source) +} + +fn extract_cuda_func_name_from_declarator(declarator: &Node, source: &[u8]) -> Option { + match declarator.kind() { + "function_declarator" => { + let inner = declarator.child_by_field_name("declarator")?; + Some(unwrap_cuda_declarator(&inner, source)) + } + "pointer_declarator" | "reference_declarator" => { + let inner = find_child(declarator, "function_declarator")?; + let name_node = inner.child_by_field_name("declarator")?; + Some(unwrap_cuda_declarator(&name_node, source)) + } + _ => Some(unwrap_cuda_declarator(declarator, source)), + } +} + +fn extract_cuda_parameters(node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let declarator = match node.child_by_field_name("declarator") { + Some(d) => d, + None => return params, + }; + let func_decl = if declarator.kind() == "function_declarator" { + Some(declarator) + } else { + find_child(&declarator, "function_declarator") + }; + if let Some(func_decl) = func_decl { + if let Some(param_list) = func_decl.child_by_field_name("parameters") { + for i in 0..param_list.child_count() { + if let Some(child) = param_list.child(i) { + if child.kind() == "parameter_declaration" + || child.kind() == "optional_parameter_declaration" + { + if let Some(decl) = child.child_by_field_name("declarator") { + let name = unwrap_cuda_declarator(&decl, source); + if !name.is_empty() { + params.push(child_def(name, "parameter", start_line(&child))); + } + } + } + } + } + } + } + params +} + +fn extract_cuda_fields(body: &Node, source: &[u8]) -> Vec { + let mut fields = Vec::new(); + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "field_declaration" { + if let Some(decl) = child.child_by_field_name("declarator") { + let name = unwrap_cuda_declarator(&decl, source); + if !name.is_empty() { + fields.push(child_def(name, "property", start_line(&child))); + } + } + } + } + } + fields +} + +fn extract_cuda_enum_constants(node: &Node, source: &[u8]) -> Vec { + let mut constants = Vec::new(); + if let Some(body) = node.child_by_field_name("body") { + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if child.kind() == "enumerator" { + if let Some(name_node) = child.child_by_field_name("name") { + constants.push(child_def( + node_text(&name_node, source).to_string(), + "constant", + start_line(&child), + )); + } + } + } + } + } + constants +} + +fn extract_cuda_base_classes( + node: &Node, + source: &[u8], + class_name: &str, + symbols: &mut FileSymbols, +) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "base_class_clause" { + for j in 0..child.child_count() { + if let Some(base) = child.child(j) { + match base.kind() { + "type_identifier" + | "qualified_identifier" + | "scoped_type_identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: Some(node_text(&base, source).to_string()), + implements: None, + line: start_line(node), + }); + } + _ => {} + } + } + } + } + } + } +} + +// ── Per-node-kind handlers ────────────────────────────────────────────────── + +fn handle_cuda_function_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name) = extract_cuda_function_name(node, source) { + let parent_class = + find_enclosing_type_name(node, &["class_specifier", "struct_specifier"], source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name, + }; + let kind = if parent_class.is_some() { + "method" + } else { + "function" + }; + let children = extract_cuda_parameters(node, source); + let qualifiers = extract_cuda_qualifiers(node, source); + let decorators = if qualifiers.is_empty() { + None + } else { + Some(qualifiers) + }; + // Reuse the "cpp" rule id for complexity/CFG — the CUDA grammar exposes + // the same C++ control-flow node types, and there is no dedicated "cuda" + // rule set in `ast-analysis/rules/`. + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators, + complexity: compute_all_metrics(node, source, "cpp"), + cfg: build_function_cfg(node, "cpp", source), + children: opt_children(children), + }); + } +} + +fn handle_cuda_class_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let class_name = node_text(&name_node, source).to_string(); + let children = node + .child_by_field_name("body") + .map(|body| extract_cuda_fields(&body, source)) + .unwrap_or_default(); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + extract_cuda_base_classes(node, source, &class_name, symbols); + } +} + +fn handle_cuda_struct_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let struct_name = node_text(&name_node, source).to_string(); + let children = node + .child_by_field_name("body") + .map(|body| extract_cuda_fields(&body, source)) + .unwrap_or_default(); + symbols.definitions.push(Definition { + name: struct_name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_cuda_enum_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + let children = extract_cuda_enum_constants(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + } +} + +fn handle_cuda_namespace_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "namespace".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_cuda_type_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // Match JS: scan children right-to-left and take the first type-like node + // as the alias name. Mirrors `handleCudaTypedef` in + // `src/extractors/cuda.ts`. + let mut alias_name = None; + for i in (0..node.child_count()).rev() { + if let Some(child) = node.child(i) { + match child.kind() { + "type_identifier" | "identifier" | "primitive_type" => { + alias_name = Some(node_text(&child, source).to_string()); + break; + } + _ => {} + } + } + } + if let Some(name) = alias_name { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_cuda_preproc_include(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // JS strips quote/angle delimiters and exposes the full include path as + // the `source` plus the file's basename (no extension strip) as the only + // import name. Tagged with `cInclude` so resolution treats it like a C/C++ + // header. + if let Some(path_node) = node.child_by_field_name("path") { + let raw = node_text(&path_node, source); + let path = raw.trim_matches(|c| c == '"' || c == '<' || c == '>'); + if !path.is_empty() { + let last = path.rsplit('/').next().unwrap_or(path); + let mut imp = Import::new( + path.to_string(), + vec![last.to_string()], + start_line(node), + ); + imp.c_include = Some(true); + symbols.imports.push(imp); + } + } +} + +fn handle_cuda_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(fn_node) = node.child_by_field_name("function") { + if fn_node.kind() == "field_expression" { + let name = named_child_text(&fn_node, "field", source) + .map(|s| s.to_string()) + .unwrap_or_default(); + let receiver = named_child_text(&fn_node, "argument", source).map(|s| s.to_string()); + if !name.is_empty() { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver, + }); + } + } else { + let name = node_text(&fn_node, source).to_string(); + if !name.is_empty() { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + } +} + +fn match_cuda_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function_definition" => handle_cuda_function_definition(node, source, symbols), + "class_specifier" => handle_cuda_class_specifier(node, source, symbols), + "struct_specifier" => handle_cuda_struct_specifier(node, source, symbols), + "enum_specifier" => handle_cuda_enum_specifier(node, source, symbols), + "namespace_definition" => handle_cuda_namespace_definition(node, source, symbols), + "type_definition" => handle_cuda_type_definition(node, source, symbols), + "preproc_include" => handle_cuda_preproc_include(node, source, symbols), + "call_expression" => handle_cuda_call_expression(node, source, symbols), + _ => {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_cuda(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_cuda::LANGUAGE.into()) + .expect("loads CUDA grammar"); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + CudaExtractor.extract(&tree, code.as_bytes(), "test.cu") + } + + #[test] + fn extracts_host_function() { + let s = parse_cuda("void hostFunction(int n) { return; }"); + let f = s + .definitions + .iter() + .find(|d| d.name == "hostFunction") + .expect("hostFunction extracted"); + assert_eq!(f.kind, "function"); + } + + #[test] + fn extracts_struct_with_fields() { + let s = parse_cuda("struct Vec3 { float x; float y; float z; };"); + let v = s + .definitions + .iter() + .find(|d| d.name == "Vec3") + .expect("Vec3 extracted"); + assert_eq!(v.kind, "struct"); + } + + #[test] + fn extracts_class_with_method() { + let s = parse_cuda("class Foo { public: void bar() {} };"); + let foo = s + .definitions + .iter() + .find(|d| d.name == "Foo") + .expect("class Foo extracted"); + assert_eq!(foo.kind, "class"); + let bar = s + .definitions + .iter() + .find(|d| d.name == "Foo.bar") + .expect("method Foo.bar extracted"); + assert_eq!(bar.kind, "method"); + } + + #[test] + fn extracts_namespace() { + let s = parse_cuda("namespace myns { int x; }"); + let n = s + .definitions + .iter() + .find(|d| d.name == "myns") + .expect("namespace extracted"); + assert_eq!(n.kind, "namespace"); + } + + #[test] + fn extracts_inheritance() { + let s = parse_cuda("class Base {}; class Derived : public Base {};"); + let rel = s + .classes + .iter() + .find(|c| c.name == "Derived") + .expect("Derived base class recorded"); + assert_eq!(rel.extends.as_deref(), Some("Base")); + } + + #[test] + fn extracts_include_with_c_include_flag() { + let s = parse_cuda("#include \n#include \"mylib.cuh\""); + assert_eq!(s.imports.len(), 2); + assert!(s.imports[0].c_include.unwrap_or(false)); + assert_eq!(s.imports[0].source, "cuda_runtime.h"); + assert_eq!(s.imports[1].source, "mylib.cuh"); + } + + #[test] + fn extracts_call_expression() { + let s = parse_cuda("void foo() { cudaMalloc(&ptr, size); }"); + assert!(s.calls.iter().any(|c| c.name == "cudaMalloc")); + } + + #[test] + fn extracts_method_call_with_receiver() { + let s = parse_cuda( + "void run() { UserService svc; svc.createUser(\"1\", \"a\", \"a@b\"); }", + ); + let call = s + .calls + .iter() + .find(|c| c.name == "createUser") + .expect("createUser call recorded"); + assert_eq!(call.receiver.as_deref(), Some("svc")); + } + + #[test] + fn captures_cuda_qualifier_decorators() { + let s = parse_cuda("__global__ void kernel(int *data, int n) { }"); + let k = s + .definitions + .iter() + .find(|d| d.name == "kernel") + .expect("kernel extracted"); + let decorators = k.decorators.as_ref().expect("decorators present"); + assert!(decorators.iter().any(|d| d == "__global__")); + } + + #[test] + fn extracts_typedef_alias() { + let s = parse_cuda("typedef unsigned int uint32_t;"); + assert!(s.definitions.iter().any(|d| d.name == "uint32_t" && d.kind == "type")); + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b02531896..44986bb3b 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -260,6 +260,20 @@ pub const CPP_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &['L', 'u', 'U', 'R'], }; +/// CUDA is a C++ superset; the tree-sitter-cuda grammar extends C++ with +/// `__global__`/`__device__`/`__host__`/`__shared__` qualifiers and kernel +/// launch syntax. The node-type vocabulary for literals, exceptions, and +/// awaits is otherwise identical to C++. +pub const CUDA_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &["new_expression"], + throw_types: &["throw_statement"], + await_types: &["co_await_expression"], + string_types: &["string_literal", "raw_string_literal"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &['L', 'u', 'U', 'R'], +}; + pub const KOTLIN_AST_CONFIG: LangAstConfig = LangAstConfig { new_types: &[], throw_types: &["throw_expression"], diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f98..dc2987070 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -2,6 +2,7 @@ pub mod bash; pub mod c; pub mod cpp; pub mod csharp; +pub mod cuda; pub mod dart; pub mod elixir; pub mod go; @@ -126,5 +127,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Cuda => { + cuda::CudaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb157814..29f65713a 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -34,8 +34,9 @@ const DEFAULT_IGNORE_DIRS: &[&str] = &[ /// Must stay in sync with `LanguageKind::from_extension`. const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", - "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", - "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "cu", + "cuh", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", + "ml", "mli", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f29..915c39bac 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Cuda, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Cuda => "cuda", } } @@ -86,6 +88,7 @@ impl LanguageKind { "php" | "phtml" => Some(Self::Php), "c" | "h" => Some(Self::C), "cpp" | "cc" | "cxx" | "hpp" => Some(Self::Cpp), + "cu" | "cuh" => Some(Self::Cuda), "kt" | "kts" => Some(Self::Kotlin), "swift" => Some(Self::Swift), "scala" => Some(Self::Scala), @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "cuda" => Some(Self::Cuda), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Cuda => tree_sitter_cuda::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, Cuda, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::Cuda => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59b..031b057c3 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -101,6 +101,11 @@ const CPP_AST_TYPES: Record = { raw_string_literal: 'string', }; +// CUDA's tree-sitter grammar inherits the full C++ node vocabulary, so the +// AST node types and quote rules are identical to C++. Mirrors the native +// `CUDA_AST_CONFIG` in `crates/codegraph-core/src/extractors/helpers.rs`. +const CUDA_AST_TYPES: Record = CPP_AST_TYPES; + const KOTLIN_AST_TYPES: Record = { throw_expression: 'throw', string_literal: 'string', @@ -166,6 +171,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['php', PHP_AST_TYPES], ['c', C_AST_TYPES], ['cpp', CPP_AST_TYPES], + ['cuda', CUDA_AST_TYPES], ['kotlin', KOTLIN_AST_TYPES], ['swift', SWIFT_AST_TYPES], ['scala', SCALA_AST_TYPES], @@ -201,6 +207,8 @@ const RB_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: ' const PHP_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' }; const C_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const CPP_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: 'LuUR' }; +// CUDA shares C++ string-literal lexing, including the `L`/`u`/`U`/`R` prefixes. +const CUDA_STRING_CONFIG: AstStringConfig = CPP_STRING_CONFIG; const KOTLIN_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const SWIFT_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const SCALA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; @@ -225,6 +233,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['php', PHP_STRING_CONFIG], ['c', C_STRING_CONFIG], ['cpp', CPP_STRING_CONFIG], + ['cuda', CUDA_STRING_CONFIG], ['kotlin', KOTLIN_STRING_CONFIG], ['swift', SWIFT_STRING_CONFIG], ['scala', SCALA_STRING_CONFIG], diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd809..80b5810eb 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -457,6 +457,8 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.cc', '.cxx', '.hpp', + '.cu', + '.cuh', '.kt', '.kts', '.swift', diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d53..7e202b215 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -22,12 +22,11 @@ describe('classifyNativeDrops', () => { 'src/e.R', 'src/f.erl', 'src/g.sol', - 'src/h.cu', 'src/i.groovy', 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From 25f0adc8853d12fd9ca1dd47701a02cee852dcea Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:13:23 -0600 Subject: [PATCH 2/3] fix(native): mirror C++ extractor type-map and include name stripping in CUDA CUDA files now populate `type_map` from declarations and parameter declarations via a third walk pass, matching `cpp.rs`. Without it, receiver-typed calls like `buf.copy(...)` could not resolve to `DeviceBuffer.copy` for CUDA sources in the native layer. `#include` import names also now drop the trailing `.cuh`/`.hpp`/`.h` extension so `cInclude` resolution links CUDA headers consistently with the native C/C++ extractors. Strengthens the include test to cover the stripped names and adds two type_map tests. --- crates/codegraph-core/src/extractors/cuda.rs | 102 ++++++++++++++++++- 1 file changed, 97 insertions(+), 5 deletions(-) diff --git a/crates/codegraph-core/src/extractors/cuda.rs b/crates/codegraph-core/src/extractors/cuda.rs index 19746b9bb..995df747a 100644 --- a/crates/codegraph-core/src/extractors/cuda.rs +++ b/crates/codegraph-core/src/extractors/cuda.rs @@ -29,10 +29,66 @@ impl SymbolExtractor for CudaExtractor { &mut symbols.ast_nodes, &CUDA_AST_CONFIG, ); + // Third pass: populate type_map with variable-to-type bindings so + // receiver-typed call resolution (e.g. `buf.copy(...)` → `DeviceBuffer.copy`) + // fires for CUDA files just like it does for C++ files. Mirrors the + // third walk in `cpp.rs`. + walk_tree(&tree.root_node(), source, &mut symbols, match_cuda_type_map); symbols } } +// ── Type inference ────────────────────────────────────────────────────────── + +/// Populate `symbols.type_map` from `declaration` and `parameter_declaration` +/// nodes. Mirrors `match_cpp_type_map` in `cpp.rs` — the CUDA grammar shares +/// these C++ node types, so the same logic works unchanged. +fn match_cuda_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "declaration" => { + if let Some(type_node) = node.child_by_field_name("type") { + let type_name = node_text(&type_node, source); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "init_declarator" || child.kind() == "identifier" { + let name_node = if child.kind() == "init_declarator" { + child.child_by_field_name("declarator") + } else { + Some(child) + }; + if let Some(name_node) = name_node { + let final_name = unwrap_cuda_declarator(&name_node, source); + if !final_name.is_empty() { + symbols.type_map.push(TypeMapEntry { + name: final_name, + type_name: type_name.to_string(), + confidence: 0.9, + }); + } + } + } + } + } + } + } + "parameter_declaration" => { + if let Some(type_node) = node.child_by_field_name("type") { + if let Some(decl) = node.child_by_field_name("declarator") { + let name = unwrap_cuda_declarator(&decl, source); + if !name.is_empty() { + symbols.type_map.push(TypeMapEntry { + name, + type_name: node_text(&type_node, source).to_string(), + confidence: 0.9, + }); + } + } + } + } + _ => {} + } +} + // ── CUDA-specific qualifiers ──────────────────────────────────────────────── const CUDA_QUALIFIERS: &[&str] = &[ @@ -364,18 +420,24 @@ fn handle_cuda_type_definition(node: &Node, source: &[u8], symbols: &mut FileSym } fn handle_cuda_preproc_include(node: &Node, source: &[u8], symbols: &mut FileSymbols) { - // JS strips quote/angle delimiters and exposes the full include path as - // the `source` plus the file's basename (no extension strip) as the only - // import name. Tagged with `cInclude` so resolution treats it like a C/C++ - // header. + // Strip quote/angle delimiters and expose the basename minus header + // extension as the import name, matching the native C++ extractor so + // `cInclude` resolution links CUDA includes consistently with C/C++. + // CUDA-specific `.cuh` headers are stripped in addition to `.h`/`.hpp`. + // Tagged with `cInclude` so resolution treats it like a C/C++ header. if let Some(path_node) = node.child_by_field_name("path") { let raw = node_text(&path_node, source); let path = raw.trim_matches(|c| c == '"' || c == '<' || c == '>'); if !path.is_empty() { let last = path.rsplit('/').next().unwrap_or(path); + let name = last + .strip_suffix(".cuh") + .or_else(|| last.strip_suffix(".hpp")) + .or_else(|| last.strip_suffix(".h")) + .unwrap_or(last); let mut imp = Import::new( path.to_string(), - vec![last.to_string()], + vec![name.to_string()], start_line(node), ); imp.c_include = Some(true); @@ -508,7 +570,37 @@ mod tests { assert_eq!(s.imports.len(), 2); assert!(s.imports[0].c_include.unwrap_or(false)); assert_eq!(s.imports[0].source, "cuda_runtime.h"); + // Header extensions are stripped from import names so `cInclude` + // resolution matches C/C++ behavior in the native layer. + assert_eq!(s.imports[0].names, vec!["cuda_runtime".to_string()]); assert_eq!(s.imports[1].source, "mylib.cuh"); + assert_eq!(s.imports[1].names, vec!["mylib".to_string()]); + } + + #[test] + fn populates_type_map_from_declarations() { + let s = parse_cuda( + "void run() { DeviceBuffer buf; buf.copy(src, n); }", + ); + // `DeviceBuffer buf;` should be recorded so receiver-typed call + // resolution can map `buf.copy` to `DeviceBuffer.copy`. + let entry = s + .type_map + .iter() + .find(|e| e.name == "buf") + .expect("buf type binding present in type_map"); + assert_eq!(entry.type_name, "DeviceBuffer"); + } + + #[test] + fn populates_type_map_from_parameters() { + let s = parse_cuda("void run(DeviceBuffer buf) { buf.copy(); }"); + let entry = s + .type_map + .iter() + .find(|e| e.name == "buf") + .expect("buf parameter type binding present in type_map"); + assert_eq!(entry.type_name, "DeviceBuffer"); } #[test] From 8aff96d264d33655d6d46e780e11e2e0d74a51bd Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:13:36 -0600 Subject: [PATCH 3/3] chore(native): sync Cargo.lock with crate version bump from main merge --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index d4dcd5355..188e5b363 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.9.6" +version = "3.10.0" dependencies = [ "globset", "ignore",