diff --git a/Cargo.lock b/Cargo.lock index cfd3efbc..e2fd1f00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,6 +93,7 @@ dependencies = [ "tree-sitter-hcl", "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-julia", "tree-sitter-kotlin-sg", "tree-sitter-lua", "tree-sitter-ocaml", @@ -861,6 +862,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-julia" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-kotlin-sg" version = "0.4.0" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index dd50f77c..093f8aaf 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -36,6 +36,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-julia = "0.23" tree-sitter-clojure-orchard = "0.2" rayon = "1" ignore = "0.4" diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/change_detection.rs index c735b1e0..fd30bb6e 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/change_detection.rs @@ -774,7 +774,7 @@ mod tests { #[test] fn detect_removed_skips_unsupported_extensions() { - // Files in WASM-only languages (Gleam, Julia, F#) live in + // Files in WASM-only languages (Gleam, F#) live in // `file_hashes` because the JS-side WASM backfill writes them, but // Rust's narrower file_collector never collects them. Without this // skip, every incremental rebuild would flag them as removed and @@ -782,7 +782,6 @@ mod tests { let mut existing = HashMap::new(); for path in [ "tests/fixtures/gleam/main.gleam", - "tests/fixtures/julia/main.jl", "tests/fixtures/fsharp/Main.fs", ] { existing.insert( diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index d860d672..50e243f6 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -374,6 +374,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const JULIA_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string_literal", "prefixed_string_literal"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + pub const CLOJURE_AST_CONFIG: LangAstConfig = LangAstConfig { new_types: &[], throw_types: &[], diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs new file mode 100644 index 00000000..0f0e7e59 --- /dev/null +++ b/crates/codegraph-core/src/extractors/julia.rs @@ -0,0 +1,756 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct JuliaExtractor; + +impl SymbolExtractor for JuliaExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_julia(&tree.root_node(), source, &mut symbols, None); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &JULIA_AST_CONFIG); + symbols + } +} + +/// Walk Julia tree threading the current enclosing module name. The JS +/// extractor (`src/extractors/julia.ts`) tracks `currentModule` so that +/// definitions inside `module Foo ... end` are prefixed `Foo.bar`. The +/// generic `walk_tree` helper cannot pass extra state, so we open-code a +/// recursive walker here. +fn walk_julia( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + let mut next_module = current_module.map(|s| s.to_string()); + + match node.kind() { + "module_definition" => { + if let Some(name) = handle_module_def(node, source, symbols) { + next_module = Some(name); + } + } + "function_definition" => handle_function_def(node, source, symbols, current_module), + "assignment" => handle_assignment(node, source, symbols, current_module), + "struct_definition" => handle_struct_def(node, source, symbols), + "abstract_definition" => handle_abstract_def(node, source, symbols), + "macro_definition" => handle_macro_def(node, source, symbols, current_module), + "import_statement" | "using_statement" => handle_import(node, source, symbols), + "call_expression" => handle_call(node, source, symbols), + _ => {} + } + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_julia(&child, source, symbols, next_module.as_deref()); + } + } +} + +fn handle_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> Option { + let name_node = node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier"))?; + let name = node_text(&name_node, source).to_string(); + + symbols.definitions.push(Definition { + name: name.clone(), + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + + Some(name) +} + +/// Extract the function-name identifier from a `signature` (or call_expression +/// directly) node. tree-sitter-julia wraps the call signature of a +/// `function_definition` / `macro_definition` in a `signature` node whose +/// first child is the `call_expression` — `find_child` only inspects direct +/// children, so we unwrap one level explicitly. +fn signature_call<'a>(node: &Node<'a>) -> Option> { + if let Some(sig) = find_child(node, "signature") { + return find_child(&sig, "call_expression"); + } + find_child(node, "call_expression") +} + +fn handle_function_def( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + if let Some(call_sig) = signature_call(node) { + if let Some(func_name_node) = call_sig.child(0) { + let base = node_text(&func_name_node, source); + // For qualified names (`function Base.show ... end` inside a module), + // the LHS is a `scoped_identifier` already containing the qualifier — + // skip the module prefix to avoid producing `Outer.Base.show`. + let name = match current_module { + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), + }; + let params = extract_julia_params(&call_sig, source); + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: opt_children(params), + }); + return; + } + } + + // Fallback: look for identifier directly + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + let base = node_text(&name_node, source); + let name = match current_module { + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), + }; + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: None, + }); +} + +fn handle_assignment( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + // Short function form: `add(x, y) = x + y` → LHS is a call_expression. + let lhs = match node.child(0) { + Some(c) => c, + None => return, + }; + if lhs.kind() != "call_expression" { + return; + } + let func_name_node = match lhs.child(0) { + Some(c) => c, + None => return, + }; + let base = node_text(&func_name_node, source); + // For qualified short-form definitions like `Foo.bar(x, y) = x + y`, + // `func_name_node` is a `scoped_identifier` already containing the + // qualifier — skip the module prefix to avoid producing `Outer.Foo.bar`. + let name = match current_module { + Some(m) if !base.contains('.') => format!("{}.{}", m, base), + _ => base.to_string(), + }; + let params = extract_julia_params(&lhs, source); + + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "julia"), + cfg: build_function_cfg(node, "julia", source), + children: opt_children(params), + }); +} + +fn handle_struct_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // struct_definition: `struct` type_head `end` + // type_head wraps the name and optional supertype. The name may be a + // bare `identifier`, a `parameterized_identifier` (e.g. `Vec{T}`), or + // either of those nested inside a `binary_expression` (`Name <: Super`). + let type_head = match find_child(node, "type_head") { + Some(th) => th, + None => return, + }; + + let (name_node, supertype): (Node, Option) = if let Some(bin) = + find_child(&type_head, "binary_expression") + { + // Walk into each side of the binary expression to find the base-name + // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`. + let mut sides: Vec = Vec::new(); + for i in 0..bin.child_count() { + if let Some(c) = bin.child(i) { + if c.kind() != "operator" { + sides.push(c); + } + } + } + let name_id = sides.first().and_then(|n| find_base_name(n)); + let super_id = sides.get(1).and_then(|n| find_base_name(n)); + match name_id { + Some(n) => (n, super_id), + None => return, + } + } else if let Some(n) = find_base_name(&type_head) { + (n, None) + } else { + return; + }; + + let struct_name = node_text(&name_node, source).to_string(); + + let mut children: Vec = Vec::new(); + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() == "typed_expression" { + if let Some(field_name) = find_child(&child, "identifier") { + children.push(child_def( + node_text(&field_name, source).to_string(), + "property", + start_line(&child), + )); + } + } else if child.kind() == "identifier" { + // Plain identifier fields (no type annotation) appear as direct + // identifier children of struct_definition. The type_head is a + // separate node so there is nothing to filter out here. + children.push(child_def( + node_text(&child, source).to_string(), + "property", + start_line(&child), + )); + } + } + + if let Some(sup) = supertype { + symbols.classes.push(ClassRelation { + name: struct_name.clone(), + extends: Some(node_text(&sup, source).to_string()), + implements: None, + line: start_line(node), + }); + } + + symbols.definitions.push(Definition { + name: struct_name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); +} + +fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // abstract_definition: `abstract type` type_head `end` + // type_head wraps the name identifier — possibly nested in a + // `Name <: Super` binary_expression or a `Name{T,...}` parametrized form + // (`parameterized_identifier` / `type_parameter_list`). + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => match find_child(node, "type_head") { + Some(th) => match find_base_name(&th) { + Some(n) => n, + // Mirror the TS extractor: skip rather than emit a garbled + // definition name (e.g. raw `Name{T} <: Super{T,1}` text). + None => return, + }, + None => return, + }, + }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +/// Locate the base-name identifier within a `type_head` node. +/// +/// Handles plain identifiers, `Name <: Super` binary expressions, and +/// parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing +/// into common wrapper kinds (binary expressions, parametrized identifiers, +/// type-parameter lists). Returns `None` when no identifier can be located — +/// callers should skip emitting a definition in that case. +fn find_base_name<'a>(node: &Node<'a>) -> Option> { + // The node itself may already be the identifier (e.g. when called on a + // direct side of a binary_expression like `Point <: AbstractPoint`). + if node.kind() == "identifier" { + return Some(*node); + } + // Direct identifier child wins. + if let Some(id) = find_child(node, "identifier") { + return Some(id); + } + // Otherwise recurse through wrapper shapes that may contain the + // base-name identifier (parameterized or supertyped forms). + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "binary_expression" + | "parametrized_type_expression" + | "parameterized_identifier" + | "type_parameter_list" + | "type_argument_list" => { + if let Some(found) = find_base_name(&child) { + return Some(found); + } + } + _ => {} + } + } + None +} + +fn handle_macro_def( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_module: Option<&str>, +) { + // macro_definition: `macro` signature/call_expression body `end`. + // The name lives in the same shape as a function signature. + let name_node = if let Some(call_sig) = signature_call(node) { + call_sig.child(0) + } else { + node.child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + }; + let name_node = match name_node { + Some(n) => n, + None => return, + }; + let base = node_text(&name_node, source); + let name = match current_module { + Some(m) => format!("{}.@{}", m, base), + None => format!("@{}", base), + }; + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // tree-sitter-julia shapes: + // `using LinearAlgebra` → using_statement [ using, identifier ] + // `using ..Repository` → using_statement [ using, import_path[..] ] + // `import Foo.Bar` → import_statement [ import, scoped_identifier ] + // `import .X` → import_statement [ import, import_path[.] ] + // `import Base: show` → import_statement [ import, selected_import[Base, show] ] + // + // We collect every meaningful sub-node and derive `source` from the first. + let mut names: Vec = Vec::new(); + let mut source_str = String::new(); + + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "identifier" | "scoped_identifier" => { + let txt = node_text(&child, source); + if source_str.is_empty() { + source_str = txt.to_string(); + } + let last = txt.rsplit('.').next().unwrap_or(txt); + names.push(last.to_string()); + } + "import_path" => { + // Use the trailing identifier as the module reference. + let txt = node_text(&child, source); + let stripped = txt.trim_start_matches('.'); + if source_str.is_empty() { + source_str = stripped.to_string(); + } + let last = stripped.rsplit('.').next().unwrap_or(stripped); + if !last.is_empty() { + names.push(last.to_string()); + } + } + "selected_import" => { + // First identifier-bearing node is the source module; the rest + // are imported names. The module may itself be a + // `scoped_identifier` (e.g. `import Foo.Bar: baz`) — handle it + // alongside bare `identifier` and use the trailing segment as + // the display name, mirroring the outer loop. + let mut first = true; + for j in 0..child.child_count() { + let Some(part) = child.child(j) else { continue }; + if part.kind() == "identifier" || part.kind() == "scoped_identifier" { + let txt = node_text(&part, source).to_string(); + if first { + if source_str.is_empty() { + source_str = txt.clone(); + } + first = false; + } else { + let last = txt.rsplit('.').next().unwrap_or(&txt).to_string(); + names.push(last); + } + } + } + } + _ => {} + } + } + + if !source_str.is_empty() { + let names = if names.is_empty() { vec![source_str.clone()] } else { names }; + symbols + .imports + .push(Import::new(source_str, names, start_line(node))); + } +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // Skip when this call is the LHS of an assignment (it's a short-form + // function definition signature). + if let Some(parent) = node.parent() { + if parent.kind() == "assignment" { + if let Some(first) = parent.child(0) { + if first.id() == node.id() { + return; + } + } + } + // Skip when this call is the signature of a function/macro definition. + // In tree-sitter-julia the signature lives inside a `signature` node + // whose parent is `function_definition` or `macro_definition`. Body + // calls (e.g. `println(name)` inside `function greet ... end`) appear + // as direct children of `function_definition` and MUST be recorded — + // do not blanket-skip on that parent kind. + if parent.kind() == "signature" { + if let Some(grand) = parent.parent() { + if matches!(grand.kind(), "function_definition" | "macro_definition") { + return; + } + } + } + } + + let func_node = match node.child(0) { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "identifier" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_expression" | "scoped_identifier" => { + let raw = node_text(&func_node, source); + let parts: Vec<&str> = raw.split('.').collect(); + if parts.len() >= 2 { + let last = parts.last().copied().unwrap_or(""); + let receiver = parts[..parts.len() - 1].join("."); + symbols.calls.push(Call { + name: last.to_string(), + line: start_line(node), + dynamic: None, + receiver: Some(receiver), + }); + } else { + symbols.calls.push(Call { + name: raw.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + _ => {} + } +} + +fn extract_julia_params(call_expr: &Node, source: &[u8]) -> Vec { + let mut params: Vec = Vec::new(); + let arg_list = match find_child(call_expr, "argument_list") + .or_else(|| find_child(call_expr, "tuple_expression")) + { + Some(a) => a, + None => return params, + }; + + for i in 0..arg_list.child_count() { + let Some(child) = arg_list.child(i) else { continue }; + match child.kind() { + "identifier" => { + params.push(child_def( + node_text(&child, source).to_string(), + "parameter", + start_line(&child), + )); + } + "typed_parameter" | "typed_expression" | "optional_parameter" | "default_parameter" => { + if let Some(name_node) = find_child(&child, "identifier") { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(&child), + )); + } + } + _ => {} + } + } + params +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_jl(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_julia::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + JuliaExtractor.extract(&tree, code.as_bytes(), "test.jl") + } + + #[test] + fn finds_function() { + let s = parse_jl("function greet(name)\n println(name)\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"greet")); + let g = s.definitions.iter().find(|d| d.name == "greet").unwrap(); + assert_eq!(g.kind, "function"); + } + + #[test] + fn finds_short_form_function() { + let s = parse_jl("add(x, y) = x + y\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"add")); + let add = s.definitions.iter().find(|d| d.name == "add").unwrap(); + assert_eq!(add.kind, "function"); + let params: Vec<&str> = add + .children + .as_ref() + .map(|c| c.iter().map(|p| p.name.as_str()).collect()) + .unwrap_or_default(); + assert!(params.contains(&"x")); + assert!(params.contains(&"y")); + } + + #[test] + fn module_prefixes_inner_functions() { + let s = parse_jl("module Foo\n function bar()\n end\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo")); + assert!(names.contains(&"Foo.bar")); + } + + #[test] + fn extracts_struct_with_fields_and_supertype() { + let s = parse_jl("struct Point <: AbstractPoint\n x::Int\n y::Int\nend\n"); + let point = s + .definitions + .iter() + .find(|d| d.name == "Point") + .expect("struct should be found"); + assert_eq!(point.kind, "struct"); + let fields: Vec<&str> = point + .children + .as_ref() + .map(|c| c.iter().map(|p| p.name.as_str()).collect()) + .unwrap_or_default(); + assert!(fields.contains(&"x")); + assert!(fields.contains(&"y")); + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Point"); + assert_eq!(s.classes[0].extends, Some("AbstractPoint".to_string())); + } + + #[test] + fn extracts_struct_without_supertype() { + let s = parse_jl("struct Point\n x::Float64\n y::Float64\nend\n"); + let point = s + .definitions + .iter() + .find(|d| d.name == "Point") + .expect("struct should be found"); + assert_eq!(point.kind, "struct"); + assert!(s.classes.is_empty()); + } + + #[test] + fn extracts_abstract_type() { + let s = parse_jl("abstract type AbstractShape end\n"); + let t = s + .definitions + .iter() + .find(|d| d.name == "AbstractShape") + .expect("abstract should be found"); + assert_eq!(t.kind, "type"); + } + + #[test] + fn extracts_parameterized_abstract_type_base_name() { + // Parameterized generics with a supertype must record only the base + // identifier — never the raw `Name{T} <: Super{T,1}` text. + let s = parse_jl("abstract type AbstractVector{T} <: AbstractArray{T,1} end\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!( + names.contains(&"AbstractVector"), + "expected base name `AbstractVector`, got {names:?}" + ); + // Guard against the previous garbled-name regression. + assert!( + !names.iter().any(|n| n.contains('{') || n.contains('<')), + "definition name leaked raw type-head text: {names:?}" + ); + } + + #[test] + fn extracts_macro_def() { + let s = parse_jl("macro mymac(x)\n x\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"@mymac")); + } + + #[test] + fn extracts_qualified_calls() { + let s = parse_jl("function main()\n Repository.save(repo, 1)\n println(\"x\")\nend\n"); + let calls: Vec<(&str, Option<&str>)> = s + .calls + .iter() + .map(|c| (c.name.as_str(), c.receiver.as_deref())) + .collect(); + assert!(calls.iter().any(|(n, r)| *n == "save" && *r == Some("Repository"))); + assert!(calls.iter().any(|(n, r)| *n == "println" && r.is_none())); + } + + #[test] + fn handles_using_import() { + let s = parse_jl("using ..Repository\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "Repository"); + } + + #[test] + fn handles_selected_import() { + let s = parse_jl("import Base: show\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "Base"); + assert!(s.imports[0].names.contains(&"show".to_string())); + } + + #[test] + fn does_not_record_function_signature_as_call() { + let s = parse_jl("function greet(name)\n println(name)\nend\n"); + // `greet` itself must not appear as a call — only println. + let call_names: Vec<&str> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(!call_names.contains(&"greet")); + assert!(call_names.contains(&"println")); + } + + #[test] + fn extracts_parameterized_struct_base_name() { + // Parameterized struct names (e.g. `Vec{T}`) must record the base + // identifier — not be silently dropped or include type-parameter text. + let s = parse_jl("struct Vec{T} <: AbstractArray{T,1}\n data::Vector{T}\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!( + names.contains(&"Vec"), + "expected base name `Vec`, got {names:?}" + ); + assert!( + !names.iter().any(|n| n.contains('{') || n.contains('<')), + "definition name leaked raw type-head text: {names:?}" + ); + // Supertype should still resolve to the base identifier `AbstractArray`. + assert_eq!(s.classes.len(), 1); + assert_eq!(s.classes[0].name, "Vec"); + assert_eq!(s.classes[0].extends.as_deref(), Some("AbstractArray")); + } + + #[test] + fn qualified_short_form_method_does_not_double_prefix() { + // `Foo.bar(x, y) = x + y` inside `module Outer` must record `Foo.bar`, + // not `Outer.Foo.bar` — the scoped_identifier already carries the + // qualifier. + let s = parse_jl("module Outer\n Foo.bar(x, y) = x + y\nend\n"); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Foo.bar"), "got {names:?}"); + assert!( + !names.iter().any(|n| *n == "Outer.Foo.bar"), + "qualified method got double-prefixed: {names:?}" + ); + } + + #[test] + fn qualified_function_def_does_not_double_prefix() { + // `function Base.show(io, x) ... end` inside `module Foo` must record + // `Base.show`, not `Foo.Base.show`. + let s = parse_jl( + "module Foo\n function Base.show(io, x)\n println(io, x)\n end\nend\n", + ); + let names: Vec<&str> = s.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"Base.show"), "got {names:?}"); + assert!( + !names.iter().any(|n| *n == "Foo.Base.show"), + "qualified function def got double-prefixed: {names:?}" + ); + } + + #[test] + fn selected_import_handles_qualified_module() { + // `import Foo.Bar: baz` — module is a scoped_identifier. The import + // must record `Foo.Bar` as the source and `baz` as the imported name, + // not the malformed `source="baz", names=["baz"]`. + let s = parse_jl("import LinearAlgebra.BLAS: gemm\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "LinearAlgebra.BLAS"); + assert!( + s.imports[0].names.contains(&"gemm".to_string()), + "expected `gemm` in imported names, got {:?}", + s.imports[0].names + ); + assert!( + !s.imports[0].names.contains(&"LinearAlgebra.BLAS".to_string()), + "source module leaked into names: {:?}", + s.imports[0].names + ); + } +} diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index e2999c80..5e4131ac 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -12,6 +12,7 @@ pub mod hcl; pub mod helpers; pub mod java; pub mod javascript; +pub mod julia; pub mod kotlin; pub mod lua; pub mod ocaml; @@ -116,6 +117,9 @@ pub fn extract_symbols_with_opts( LanguageKind::Lua => { lua::LuaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Julia => { + julia::JuliaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } LanguageKind::Dart => { dart::DartExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index f7a8fef7..bb1fb41d 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,7 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "cu", "cuh", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", - "ml", "mli", "clj", "cljs", "cljc", + "ml", "mli", "jl", "clj", "cljs", "cljc", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. @@ -44,8 +44,8 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// Mirrors the predicate at the heart of `collect_files`: a file is collected /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in /// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` -/// so that files outside Rust's capability (e.g. WASM-only `.gleam`, `.jl`, -/// `.fs`) are not flagged as "removed" merely because the orchestrator's +/// so that files outside Rust's capability (e.g. WASM-only `.gleam`, `.fs`) +/// are not flagged as "removed" merely because the orchestrator's /// narrower collector never sees them. pub fn is_supported_extension(path: &str) -> bool { if LanguageKind::from_extension(path).is_some() { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index 65a9e9bd..5073bb5a 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Julia, Cuda, Clojure, } @@ -60,6 +61,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Julia => "julia", Self::Cuda => "cuda", Self::Clojure => "clojure", } @@ -102,6 +104,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "jl" => Some(Self::Julia), "clj" | "cljs" | "cljc" => Some(Self::Clojure), _ => None, } @@ -135,6 +138,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "julia" => Some(Self::Julia), "cuda" => Some(Self::Cuda), "clojure" => Some(Self::Clojure), _ => None, @@ -168,6 +172,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Julia => tree_sitter_julia::LANGUAGE.into(), Self::Cuda => tree_sitter_cuda::LANGUAGE.into(), Self::Clojure => tree_sitter_clojure_orchard::LANGUAGE.into(), } @@ -185,7 +190,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, Cuda, Clojure, + OcamlInterface, Julia, Cuda, Clojure, ] } } @@ -255,6 +260,7 @@ mod tests { | LanguageKind::Haskell | LanguageKind::Ocaml | LanguageKind::OcamlInterface + | LanguageKind::Julia | LanguageKind::Cuda | LanguageKind::Clojure => (), }; @@ -263,7 +269,7 @@ mod tests { // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 26; + const EXPECTED_LEN: usize = 27; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 1968cab4..3360891b 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -158,6 +158,11 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const JULIA_AST_TYPES: Record = { + string_literal: 'string', + prefixed_string_literal: 'string', +}; + const CLOJURE_AST_TYPES: Record = { str_lit: 'string', regex_lit: 'regex', @@ -188,6 +193,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['julia', JULIA_AST_TYPES], ['clojure', CLOJURE_AST_TYPES], ]); @@ -225,6 +231,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const JULIA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const CLOJURE_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ @@ -252,6 +259,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['julia', JULIA_STRING_CONFIG], ['clojure', CLOJURE_STRING_CONFIG], ]); diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 28275572..e6ef7131 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -473,6 +473,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.jl', '.clj', '.cljs', '.cljc', diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 3a18dbaa..97041dea 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -17,7 +17,6 @@ describe('classifyNativeDrops', () => { const { byReason, totals } = classifyNativeDrops([ 'src/a.fs', 'src/b.gleam', - 'src/d.jl', 'src/e.R', 'src/f.erl', 'src/g.sol', @@ -25,7 +24,7 @@ describe('classifyNativeDrops', () => { 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(9); + expect(totals['unsupported-by-native']).toBe(8); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']);