diff --git a/Cargo.lock b/Cargo.lock index 3ac060a9..34721b50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,7 @@ dependencies = [ "tree-sitter-dart", "tree-sitter-elixir", "tree-sitter-erlang", + "tree-sitter-gleam", "tree-sitter-go", "tree-sitter-groovy", "tree-sitter-haskell", @@ -826,6 +827,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-gleam" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0175c53793bda5d444360dd5add25463d18d66afb7f521d6791e2fc61bf2fb3" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-go" version = "0.23.4" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 34b62ffb..6a2483f3 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -36,6 +36,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-gleam = "1" tree-sitter-julia = "0.23" tree-sitter-clojure-orchard = "0.2" tree-sitter-erlang = "0.16" diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/change_detection.rs index fd30bb6e..ea203c93 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/change_detection.rs @@ -132,7 +132,7 @@ fn load_file_hashes(conn: &Connection) -> Option> { /// found on disk are treated as removed. /// /// Files whose extension is outside the Rust file_collector's supported set -/// (e.g. `.gleam`, `.jl`, `.fs` — WASM-only languages) are skipped: +/// (e.g. `.fs`, `.fsx` — WASM-only languages) are skipped: /// the orchestrator's narrower collector never sees them, so absence from /// `current` is a capability boundary, not a deletion. Their `nodes` and /// `file_hashes` rows are owned by the JS-side WASM backfill (#967, #1068) @@ -774,15 +774,15 @@ mod tests { #[test] fn detect_removed_skips_unsupported_extensions() { - // Files in WASM-only languages (Gleam, F#) live in + // Files in WASM-only languages (F#, F# Script) live in // `file_hashes` because the JS-side WASM backfill writes them, but // Rust's narrower file_collector never collects them. Without this // skip, every incremental rebuild would flag them as removed and // purge their rows — the #1066 ~2s floor. let mut existing = HashMap::new(); for path in [ - "tests/fixtures/gleam/main.gleam", "tests/fixtures/fsharp/Main.fs", + "tests/fixtures/fsharp/Main.fsx", ] { existing.insert( path.to_string(), diff --git a/crates/codegraph-core/src/extractors/gleam.rs b/crates/codegraph-core/src/extractors/gleam.rs new file mode 100644 index 00000000..879929c8 --- /dev/null +++ b/crates/codegraph-core/src/extractors/gleam.rs @@ -0,0 +1,446 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct GleamExtractor; + +impl SymbolExtractor for GleamExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_gleam_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &GLEAM_AST_CONFIG); + symbols + } +} + +fn match_gleam_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function" => handle_function(node, source, symbols), + "external_function" => handle_external_function(node, source, symbols), + "type_definition" => handle_type_definition(node, source, symbols), + "type_alias" => handle_type_alias(node, source, symbols), + "constant" => handle_constant(node, source, symbols), + "import" => handle_import(node, source, symbols), + "function_call" | "call" => handle_call(node, source, symbols), + _ => {} + } +} + +fn handle_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + let params = extract_params(node, source); + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "gleam"), + cfg: build_function_cfg(node, "gleam", source), + children: opt_children(params), + }); +} + +fn handle_external_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_type_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // type_definition wraps a type_name child (which itself has a `name` field). + // Mirror the JS extractor: prefer a `name` field on the node, fall back to + // taking the text of the `type_name` child so we get e.g. `MyType(a, b)`. + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "type_name")) + { + Some(n) => n, + None => return, + }; + + let mut children: Vec = Vec::new(); + for i in 0..node.child_count() { + let child = match node.child(i) { + Some(c) => c, + None => continue, + }; + match child.kind() { + "data_constructor" | "type_constructor" => { + if let Some(ctor_name) = child + .child_by_field_name("name") + .or_else(|| find_child(&child, "constructor_name")) + { + children.push(child_def( + node_text(&ctor_name, source).to_string(), + "property", + start_line(&child), + )); + } + } + "data_constructors" | "type_constructors" => { + for j in 0..child.child_count() { + let ctor = match child.child(j) { + Some(c) => c, + None => continue, + }; + if ctor.kind() == "data_constructor" || ctor.kind() == "type_constructor" { + if let Some(ctor_name) = ctor + .child_by_field_name("name") + .or_else(|| find_child(&ctor, "constructor_name")) + { + children.push(child_def( + node_text(&ctor_name, source).to_string(), + "property", + start_line(&ctor), + )); + } + } + } + } + _ => {} + } + } + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); +} + +fn handle_type_alias(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "type_name")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_constant(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "variable".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // `module` field holds the module path (e.g. `gleam/io` or `repository`). + let module_node = match node + .child_by_field_name("module") + .or_else(|| find_child(node, "module")) + .or_else(|| find_child(node, "string")) + { + Some(n) => n, + None => return, + }; + + let raw = node_text(&module_node, source); + let source_path = raw + .trim_matches(|c| c == '\'' || c == '"') + .to_string(); + let mut names: Vec = Vec::new(); + + // Unqualified imports: `import gleam/io.{println, print}` + if let Some(unqualified) = find_child(node, "unqualified_imports") { + for i in 0..unqualified.child_count() { + let item = match unqualified.child(i) { + Some(c) => c, + None => continue, + }; + if item.kind() == "unqualified_import" { + let name_node = item.child_by_field_name("name"); + if let Some(nn) = name_node { + names.push(node_text(&nn, source).to_string()); + } + } else if item.kind() == "identifier" { + names.push(node_text(&item, source).to_string()); + } + } + } + + // Alias: `import gleam/io as my_io` + // Mirror JS: prefer `alias` field, fall back to first identifier child + // that isn't the module node itself. Compare by node ID rather than text + // so a self-alias like `import mymodule as mymodule` is still recorded. + let alias_node = node + .child_by_field_name("alias") + .or_else(|| find_child(node, "identifier")) + .filter(|a| a.id() != module_node.id()); + if let Some(alias) = alias_node { + names.push(node_text(&alias, source).to_string()); + } + + if names.is_empty() { + // Default to the last path segment, mirroring the JS extractor. + let default_name = source_path + .rsplit('/') + .next() + .unwrap_or(&source_path) + .to_string(); + names.push(default_name); + } + + symbols + .imports + .push(Import::new(source_path, names, start_line(node))); +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = match node + .child_by_field_name("function") + .or_else(|| node.named_child(0)) + { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "identifier" | "variable" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_access" | "module_select" => { + // Qualified call: `module.func(args)` parses as field_access + // with `record` (module identifier) and `field` (label) fields. + let field = func_node + .child_by_field_name("field") + .or_else(|| func_node.child_by_field_name("label")); + let record = func_node + .child_by_field_name("record") + .or_else(|| func_node.named_child(0)); + if let Some(f) = field { + let receiver = record.and_then(|r| { + // Don't use the field itself as the receiver. + if Some(r.id()) == field.map(|n| n.id()) { + None + } else { + Some(node_text(&r, source).to_string()) + } + }); + symbols.calls.push(Call { + name: node_text(&f, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } + } + _ => {} + } +} + +fn extract_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let params_node = match func_node + .child_by_field_name("parameters") + .or_else(|| find_child(func_node, "function_parameters")) + { + Some(n) => n, + None => return params, + }; + + for i in 0..params_node.child_count() { + let param = match params_node.child(i) { + Some(c) => c, + None => continue, + }; + match param.kind() { + "function_parameter" | "parameter" => { + if let Some(name_node) = param + .child_by_field_name("name") + .or_else(|| find_child(¶m, "identifier")) + { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(¶m), + )); + } + } + "identifier" => { + params.push(child_def( + node_text(¶m, source).to_string(), + "parameter", + start_line(¶m), + )); + } + _ => {} + } + } + params +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_gleam(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_gleam::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + GleamExtractor.extract(&tree, code.as_bytes(), "test.gleam") + } + + #[test] + fn extracts_public_function() { + let s = parse_gleam("pub fn greet(name) {\n name\n}\n"); + let greet = s.definitions.iter().find(|d| d.name == "greet").unwrap(); + assert_eq!(greet.kind, "function"); + let children = greet.children.as_ref().expect("expected children"); + assert!(children.iter().any(|c| c.name == "name" && c.kind == "parameter")); + } + + #[test] + fn extracts_private_function() { + let s = parse_gleam("fn helper() {\n 1\n}\n"); + assert!(s.definitions.iter().any(|d| d.name == "helper")); + } + + #[test] + fn extracts_qualified_call_as_receiver_name() { + let code = "import repository\n\npub fn main() {\n repository.new_repo()\n}\n"; + let s = parse_gleam(code); + let call = s + .calls + .iter() + .find(|c| c.name == "new_repo") + .expect("expected qualified call to new_repo"); + assert_eq!(call.receiver.as_deref(), Some("repository")); + } + + #[test] + fn extracts_same_file_call() { + let code = "pub fn outer() {\n inner()\n}\n\nfn inner() {\n 1\n}\n"; + let s = parse_gleam(code); + let call = s + .calls + .iter() + .find(|c| c.name == "inner") + .expect("expected unqualified call to inner"); + assert!(call.receiver.is_none()); + } + + #[test] + fn extracts_import_module() { + let s = parse_gleam("import gleam/io\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert_eq!(s.imports[0].names, vec!["io".to_string()]); + } + + #[test] + fn extracts_unqualified_imports() { + let s = parse_gleam("import gleam/io.{println, print}\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert!(s.imports[0].names.contains(&"println".to_string())); + assert!(s.imports[0].names.contains(&"print".to_string())); + } + + #[test] + fn extracts_aliased_import() { + let s = parse_gleam("import gleam/io as my_io\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert_eq!(s.imports[0].names, vec!["my_io".to_string()]); + } + + #[test] + fn extracts_type_definition_with_constructors() { + let code = "pub type Color {\n Red\n Green\n Blue\n}\n"; + let s = parse_gleam(code); + let color = s + .definitions + .iter() + .find(|d| d.kind == "type") + .expect("expected type definition"); + let children = color.children.as_ref().expect("expected constructors"); + let names: Vec<&str> = children.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"Red")); + assert!(names.contains(&"Green")); + assert!(names.contains(&"Blue")); + } + + #[test] + fn extracts_type_alias() { + let s = parse_gleam("pub type UserId = Int\n"); + assert!(s.definitions.iter().any(|d| d.kind == "type")); + } + + #[test] + fn extracts_constant() { + let s = parse_gleam("pub const max_users = 100\n"); + let c = s + .definitions + .iter() + .find(|d| d.name == "max_users") + .expect("expected constant"); + assert_eq!(c.kind, "variable"); + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 81bcf1d7..5d78c7fa 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -374,6 +374,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const GLEAM_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + pub const JULIA_AST_CONFIG: LangAstConfig = LangAstConfig { new_types: &[], throw_types: &[], diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 3461d8ef..863934da 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -7,6 +7,7 @@ pub mod cuda; pub mod dart; pub mod elixir; pub mod erlang; +pub mod gleam; pub mod go; pub mod groovy; pub mod haskell; @@ -136,6 +137,9 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Gleam => { + gleam::GleamExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } LanguageKind::Cuda => { cuda::CudaExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index f1a9da40..0d3769c8 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,7 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "cu", "cuh", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", - "ml", "mli", "jl", "clj", "cljs", "cljc", "erl", "hrl", "groovy", "gvy", "sol", + "ml", "mli", "jl", "gleam", "clj", "cljs", "cljc", "erl", "hrl", "groovy", "gvy", "sol", // R is case-sensitive: both `.r` and `.R` are conventional. "r", "R", ]; @@ -46,9 +46,9 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// Mirrors the predicate at the heart of `collect_files`: a file is collected /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in /// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` -/// so that files outside Rust's capability (e.g. WASM-only `.gleam`, `.fs`) -/// are not flagged as "removed" merely because the orchestrator's -/// narrower collector never sees them. +/// so that files outside Rust's capability (e.g. WASM-only `.fs`) are +/// not flagged as "removed" merely because the orchestrator's narrower +/// collector never sees them. pub fn is_supported_extension(path: &str) -> bool { if LanguageKind::from_extension(path).is_some() { return true; diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index 99bfdfaf..3950b8d4 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Gleam, Julia, Cuda, Clojure, @@ -65,6 +66,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Gleam => "gleam", Self::Julia => "julia", Self::Cuda => "cuda", Self::Clojure => "clojure", @@ -112,6 +114,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "gleam" => Some(Self::Gleam), "jl" => Some(Self::Julia), "clj" | "cljs" | "cljc" => Some(Self::Clojure), "erl" | "hrl" => Some(Self::Erlang), @@ -152,6 +155,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "gleam" => Some(Self::Gleam), "julia" => Some(Self::Julia), "cuda" => Some(Self::Cuda), "clojure" => Some(Self::Clojure), @@ -190,6 +194,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Gleam => tree_sitter_gleam::LANGUAGE.into(), Self::Julia => tree_sitter_julia::LANGUAGE.into(), Self::Cuda => tree_sitter_cuda::LANGUAGE.into(), Self::Clojure => tree_sitter_clojure_orchard::LANGUAGE.into(), @@ -212,7 +217,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity, + OcamlInterface, Gleam, Julia, Cuda, Clojure, Erlang, Groovy, R, Solidity, ] } } @@ -282,6 +287,7 @@ mod tests { | LanguageKind::Haskell | LanguageKind::Ocaml | LanguageKind::OcamlInterface + | LanguageKind::Gleam | LanguageKind::Julia | LanguageKind::Cuda | LanguageKind::Clojure @@ -295,7 +301,7 @@ mod tests { // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 31; + const EXPECTED_LEN: usize = 32; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/package-lock.json b/package-lock.json index aaf6b35d..13968eb1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7385,6 +7385,7 @@ "resolved": "git+ssh://git@github.com/gleam-lang/tree-sitter-gleam.git#4e4643c2215c2b2343d9ec179c798818c132c9cc", "integrity": "sha512-Wn3hmgf637qVAHOX0YcF9v/DKkkRviR9+ptEgJ/mP+ekD5L+hz5WSWZged9i7SueF4M8UVNQGzBNsesX8XZHjg==", "dev": true, + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "nan": "^2.18.0" diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 6998af6f..db1ec252 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -158,6 +158,10 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const GLEAM_AST_TYPES: Record = { + string: 'string', +}; + const JULIA_AST_TYPES: Record = { string_literal: 'string', prefixed_string_literal: 'string', @@ -219,6 +223,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['gleam', GLEAM_AST_TYPES], ['julia', JULIA_AST_TYPES], ['clojure', CLOJURE_AST_TYPES], ['erlang', ERLANG_AST_TYPES], @@ -261,6 +266,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const GLEAM_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const JULIA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const CLOJURE_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const ERLANG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; @@ -293,6 +299,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['gleam', GLEAM_STRING_CONFIG], ['julia', JULIA_STRING_CONFIG], ['clojure', CLOJURE_STRING_CONFIG], ['erlang', ERLANG_STRING_CONFIG], diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 12cd8dd2..8b218a46 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -473,6 +473,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.gleam', '.jl', '.clj', '.cljs', diff --git a/src/extractors/gleam.ts b/src/extractors/gleam.ts index b7889c58..a20ff994 100644 --- a/src/extractors/gleam.ts +++ b/src/extractors/gleam.ts @@ -198,14 +198,16 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { } function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { - const funcNode = node.childForFieldName('function') || node.child(0); + const funcNode = node.childForFieldName('function') || node.namedChild(0); if (!funcNode) return; if (funcNode.type === 'identifier' || funcNode.type === 'variable') { ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); } else if (funcNode.type === 'field_access' || funcNode.type === 'module_select') { const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('label'); - const record = funcNode.child(0); + // Prefer the `record` field; fall back to first named child to skip + // anonymous punctuation tokens (the `.` between record and field). + const record = funcNode.childForFieldName('record') || funcNode.namedChild(0); if (field) { const call: Call = { name: field.text, line: node.startPosition.row + 1 }; if (record && record !== field) call.receiver = record.text; diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 710be467..32ae9f50 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -16,15 +16,14 @@ describe('classifyNativeDrops', () => { it('groups WASM-only languages under unsupported-by-native', () => { const { byReason, totals } = classifyNativeDrops([ 'src/a.fs', - 'src/b.gleam', 'src/h.fsx', 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(5); + expect(totals['unsupported-by-native']).toBe(4); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); - expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); + expect(byReason['unsupported-by-native'].get('.fsx')).toEqual(['src/h.fsx']); }); it('flags natively-supported extensions as native-extractor-failure', () => { @@ -45,12 +44,12 @@ describe('classifyNativeDrops', () => { 'src/a.ts', 'src/b.fs', 'src/c.fs', - 'src/d.gleam', + 'src/d.fsx', ]); expect(totals['native-extractor-failure']).toBe(1); expect(totals['unsupported-by-native']).toBe(3); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/b.fs', 'src/c.fs']); - expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/d.gleam']); + expect(byReason['unsupported-by-native'].get('.fsx')).toEqual(['src/d.fsx']); }); it('lowercases extensions so .R and .r share a bucket', () => { @@ -72,8 +71,9 @@ describe('classifyNativeDrops', () => { it('exposes the native-supported extension set for callers', () => { expect(NATIVE_SUPPORTED_EXTENSIONS.has('.ts')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.py')).toBe(true); + expect(NATIVE_SUPPORTED_EXTENSIONS.has('.gleam')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.fs')).toBe(false); - expect(NATIVE_SUPPORTED_EXTENSIONS.has('.gleam')).toBe(false); + expect(NATIVE_SUPPORTED_EXTENSIONS.has('.fsx')).toBe(false); }); });