diff --git a/Cargo.lock b/Cargo.lock index 413504b0..1826277a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,7 @@ dependencies = [ "tree-sitter-bash", "tree-sitter-c", "tree-sitter-c-sharp", + "tree-sitter-clojure-orchard", "tree-sitter-cpp", "tree-sitter-dart", "tree-sitter-elixir", @@ -759,6 +760,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-clojure-orchard" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e2db28a1ab22649790656936325bdc69e992c38006258694ea39a7620e784d" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-cpp" version = "0.23.4" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index e461206c..4247071d 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-clojure-orchard = "0.2" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/change_detection.rs index 08e4b741..c735b1e0 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/change_detection.rs @@ -132,7 +132,7 @@ fn load_file_hashes(conn: &Connection) -> Option> { /// found on disk are treated as removed. /// /// Files whose extension is outside the Rust file_collector's supported set -/// (e.g. `.clj`, `.gleam`, `.jl`, `.fs` — WASM-only languages) are skipped: +/// (e.g. `.gleam`, `.jl`, `.fs` — WASM-only languages) are skipped: /// the orchestrator's narrower collector never sees them, so absence from /// `current` is a capability boundary, not a deletion. Their `nodes` and /// `file_hashes` rows are owned by the JS-side WASM backfill (#967, #1068) @@ -774,14 +774,13 @@ mod tests { #[test] fn detect_removed_skips_unsupported_extensions() { - // Files in WASM-only languages (Clojure, Gleam, Julia, F#) live in + // Files in WASM-only languages (Gleam, Julia, F#) live in // `file_hashes` because the JS-side WASM backfill writes them, but // Rust's narrower file_collector never collects them. Without this // skip, every incremental rebuild would flag them as removed and // purge their rows — the #1066 ~2s floor. let mut existing = HashMap::new(); for path in [ - "tests/fixtures/clojure/main.clj", "tests/fixtures/gleam/main.gleam", "tests/fixtures/julia/main.jl", "tests/fixtures/fsharp/Main.fs", diff --git a/crates/codegraph-core/src/extractors/clojure.rs b/crates/codegraph-core/src/extractors/clojure.rs new file mode 100644 index 00000000..fc967f82 --- /dev/null +++ b/crates/codegraph-core/src/extractors/clojure.rs @@ -0,0 +1,477 @@ +use super::helpers::*; +use super::SymbolExtractor; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::constants::MAX_WALK_DEPTH; +use crate::types::*; +use tree_sitter::{Node, Tree}; + +/// Extract symbols from Clojure files. +/// +/// Clojure tree-sitter grammar (orchard fork of sogaiu/tree-sitter-clojure) notes: +/// - The grammar is minimal: everything is a list/vector/map/symbol +/// - Definitions are detected by the first symbol in a `list_lit`: defn, def, +/// defprotocol, etc. +/// - Namespace: `(ns name ...)` — establishes a module +/// - Imports: `(:require ...)` inside `ns`, or top-level `(require ...)` / `(use ...)` / `(import ...)` +/// +/// Mirrors `extractClojureSymbols` in `src/extractors/clojure.ts` — the JS engine +/// is the source of truth for behavior parity. +pub struct ClojureExtractor; + +impl SymbolExtractor for ClojureExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_clojure(&tree.root_node(), source, &mut symbols, None, 0); + walk_ast_nodes_with_config( + &tree.root_node(), + source, + &mut symbols.ast_nodes, + &CLOJURE_AST_CONFIG, + ); + symbols + } +} + +/// Walk the tree, dispatching on `list_lit` forms and threading the current +/// namespace through children (matches the `currentNs` parameter in the JS +/// `walkClojureNode`). Note: the JS implementation only propagates `nextNs` +/// to *children* of the form that established it — siblings in the source root +/// do not inherit it. This Rust port preserves that behavior so top-level +/// `defn` forms produce unqualified names (matching the fixture's +/// `expected-edges.json`). +fn walk_clojure( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_ns: Option<&str>, + depth: usize, +) { + if depth >= MAX_WALK_DEPTH { + return; + } + + let mut next_ns_owned: Option = None; + let next_ns: Option<&str> = if node.kind() == "list_lit" { + match handle_list_form(node, source, symbols, current_ns) { + Some(ns) => { + next_ns_owned = Some(ns); + next_ns_owned.as_deref() + } + None => current_ns, + } + } else { + current_ns + }; + + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + walk_clojure(&child, source, symbols, next_ns, depth + 1); + } + } +} + +/// Dispatch on the first symbol in a list form. Returns `Some(ns_name)` if +/// this form is an `ns` declaration so the namespace can be threaded into +/// its children. +fn handle_list_form( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_ns: Option<&str>, +) -> Option { + let first_sym = find_first_symbol(node)?; + let name = node_text(&first_sym, source); + + match name { + "ns" => return handle_ns_form(node, source, symbols), + "def" | "defonce" => { + handle_def_form(node, source, symbols, current_ns, "variable"); + } + "defn" => handle_defn_form(node, source, symbols, current_ns), + "defn-" => handle_defn_form(node, source, symbols, current_ns), + "defmacro" => handle_defn_form(node, source, symbols, current_ns), + "defprotocol" => handle_defprotocol(node, source, symbols), + "defrecord" => handle_defrecord(node, source, symbols, "record"), + "deftype" => handle_defrecord(node, source, symbols, "type"), + "defmulti" => { + handle_def_form(node, source, symbols, current_ns, "function"); + } + "defmethod" => handle_defn_form(node, source, symbols, current_ns), + "require" | "use" | "import" => { + handle_import_form(node, source, symbols, name); + } + _ => { + // Regular function call — only push if not a keyword (`:foo`) or + // accidental delimiter capture (`(`). + if !name.starts_with(':') && !name.starts_with('(') { + symbols.calls.push(Call { + name: name.to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + } + + None +} + +/// Find the first `sym_lit` or `kwd_lit` child, skipping delimiters and metadata. +/// Mirrors `findFirstSymbol` in the JS extractor. +/// +/// A missing child at index `i < child_count()` is treated as "skip and continue" +/// to match the JS counterpart (`if (!child) continue;`), rather than aborting +/// the search via `?`. +fn find_first_symbol<'a>(list_node: &Node<'a>) -> Option> { + for i in 0..list_node.child_count() { + let child = match list_node.child(i) { + Some(c) => c, + None => continue, + }; + if is_delimiter_or_meta(child.kind()) { + continue; + } + if child.kind() == "sym_lit" || child.kind() == "kwd_lit" { + return Some(child); + } + break; + } + None +} + +/// Find the second `sym_lit` or `kwd_lit` child. Used to extract the bound +/// name from forms like `(defn foo [...] ...)`. +/// +/// Like `find_first_symbol`, a missing child is skipped (not propagated via `?`) +/// to preserve parity with the JS extractor. +fn find_second_symbol<'a>(list_node: &Node<'a>) -> Option> { + let mut count = 0; + for i in 0..list_node.child_count() { + let child = match list_node.child(i) { + Some(c) => c, + None => continue, + }; + if is_delimiter_or_meta(child.kind()) { + continue; + } + if child.kind() == "sym_lit" || child.kind() == "kwd_lit" { + count += 1; + if count == 2 { + return Some(child); + } + } + } + None +} + +/// `true` for delimiter tokens (`(`, `)`, `[`, `]`, `{`, `}`, `#`) and the +/// `meta_lit` node kind, matching the JS check `'()[]{}#'.includes(child.type)`. +fn is_delimiter_or_meta(kind: &str) -> bool { + matches!(kind, "(" | ")" | "[" | "]" | "{" | "}" | "#" | "meta_lit") +} + +fn handle_ns_form(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> Option { + let name_node = find_second_symbol(node)?; + let ns_name = node_text(&name_node, source).to_string(); + + symbols.definitions.push(Definition { + name: ns_name.clone(), + kind: "module".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + + // Scan for nested `(:require ...)`, `(:import ...)`, `(:use ...)` forms. + for i in 0..node.child_count() { + let child = match node.child(i) { + Some(c) if c.kind() == "list_lit" => c, + _ => continue, + }; + let kw = match find_first_symbol(&child) { + Some(k) => k, + None => continue, + }; + let kw_text = node_text(&kw, source); + if kw_text == ":require" || kw_text == ":import" || kw_text == ":use" { + extract_ns_requires(&child, source, symbols); + } + } + + Some(ns_name) +} + +fn extract_ns_requires(require_form: &Node, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..require_form.child_count() { + let child = match require_form.child(i) { + Some(c) => c, + None => continue, + }; + + // Vector form: `[some.ns :as alias]` + if child.kind() == "vec_lit" { + if let Some(sym) = find_first_symbol(&child) { + let text = node_text(&sym, source); + let last = text.rsplit('.').next().unwrap_or(text).to_string(); + symbols + .imports + .push(Import::new(text.to_string(), vec![last], start_line(&child))); + } + } + + // Bare-symbol form: `some.ns` (only after the leading `:require` keyword, + // so guard against picking up the `:require` itself). + if child.kind() == "sym_lit" && i > 0 { + let text = node_text(&child, source); + if !text.starts_with(':') { + let last = text.rsplit('.').next().unwrap_or(text).to_string(); + symbols + .imports + .push(Import::new(text.to_string(), vec![last], start_line(&child))); + } + } + } +} + +fn handle_def_form( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_ns: Option<&str>, + kind: &str, +) { + let name_node = match find_second_symbol(node) { + Some(n) => n, + None => return, + }; + let raw_name = node_text(&name_node, source); + let full_name = match current_ns { + Some(ns) => format!("{}/{}", ns, raw_name), + None => raw_name.to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_defn_form( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + current_ns: Option<&str>, +) { + let name_node = match find_second_symbol(node) { + Some(n) => n, + None => return, + }; + let raw_name = node_text(&name_node, source); + let full_name = match current_ns { + Some(ns) => format!("{}/{}", ns, raw_name), + None => raw_name.to_string(), + }; + + let params = extract_clojure_params(node, source); + + // Note: visibility (defn vs defn-) would distinguish public/private, + // but the `Definition` struct does not yet expose a visibility field. + // When it does, wire `keyword == "defn-"` → private. + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "clojure"), + cfg: build_function_cfg(node, "clojure", source), + children: opt_children(params), + }); +} + +fn extract_clojure_params(defn_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + // First `vec_lit` child is the parameter vector `[x y z]`. + // + // Known limitation (parity with JS extractor): for `defmethod` forms like + // `(defmethod foo [:a :b] [x] body)`, the dispatch vector `[:a :b]` is the + // first `vec_lit` and the actual parameter vector `[x]` is silently + // skipped because of the `break` below. The dispatch vector contributes + // no `sym_lit` entries (its elements are `kwd_lit`), so `params` ends up + // empty rather than wrong. Tracked as a future enhancement once + // visibility/metadata fields land in `Definition`. + for i in 0..defn_node.child_count() { + let child = match defn_node.child(i) { + Some(c) if c.kind() == "vec_lit" => c, + _ => continue, + }; + for j in 0..child.child_count() { + if let Some(param) = child.child(j) { + if param.kind() == "sym_lit" { + params.push(child_def( + node_text(¶m, source).to_string(), + "parameter", + start_line(¶m), + )); + } + } + } + break; // Only the first vector is the params + } + params +} + +fn handle_defprotocol(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_second_symbol(node) { + Some(n) => n, + None => return, + }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_defrecord(node: &Node, source: &[u8], symbols: &mut FileSymbols, kind: &str) { + let name_node = match find_second_symbol(node) { + Some(n) => n, + None => return, + }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +/// Handle a top-level `(require ...)`, `(use ...)`, or `(import ...)` form. +/// +/// Known limitation (parity with JS extractor): in real Clojure code these +/// top-level forms almost always use a quoted symbol (`(require 'some.ns)` +/// → `quoting_lit`) or a quoted vector (`(require '[some.ns :as s])`). +/// `find_second_symbol` only matches `sym_lit` / `kwd_lit`, so those shapes +/// return `None` and the import is silently dropped here. Imports inside +/// `(ns ...)` declarations are still extracted correctly by +/// `extract_ns_requires` — that path is the recommended one and covers +/// real-world Clojure code, while this top-level fallback only handles the +/// degenerate unquoted shape. +fn handle_import_form(node: &Node, source: &[u8], symbols: &mut FileSymbols, keyword: &str) { + let name_node = match find_second_symbol(node) { + Some(n) => n, + None => return, + }; + symbols.imports.push(Import::new( + node_text(&name_node, source).to_string(), + vec![keyword.to_string()], + start_line(node), + )); +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_clj(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_clojure_orchard::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + ClojureExtractor.extract(&tree, code.as_bytes(), "test.clj") + } + + #[test] + fn extracts_defn() { + let s = parse_clj("(defn greet [name] (println name))"); + let greet = s.definitions.iter().find(|d| d.name == "greet").unwrap(); + assert_eq!(greet.kind, "function"); + let params = greet.children.as_ref().expect("params"); + assert_eq!(params.len(), 1); + assert_eq!(params[0].name, "name"); + assert_eq!(params[0].kind, "parameter"); + } + + #[test] + fn extracts_private_defn() { + let s = parse_clj("(defn- helper [x] x)"); + let helper = s.definitions.iter().find(|d| d.name == "helper").unwrap(); + assert_eq!(helper.kind, "function"); + } + + #[test] + fn extracts_ns_and_requires() { + let s = parse_clj( + "(ns app.main\n (:require [app.service :as service]\n [app.repository :as repository]))", + ); + let ns = s.definitions.iter().find(|d| d.name == "app.main").unwrap(); + assert_eq!(ns.kind, "module"); + assert_eq!(s.imports.len(), 2); + let sources: Vec<&str> = s.imports.iter().map(|i| i.source.as_str()).collect(); + assert!(sources.contains(&"app.service")); + assert!(sources.contains(&"app.repository")); + } + + #[test] + fn extracts_qualified_call() { + let s = parse_clj("(defn run [] (service/create-user))"); + assert!(s.calls.iter().any(|c| c.name == "service/create-user")); + } + + #[test] + fn extracts_defprotocol_as_interface() { + let s = parse_clj("(defprotocol Greeter (greet [this]))"); + let proto = s.definitions.iter().find(|d| d.name == "Greeter").unwrap(); + assert_eq!(proto.kind, "interface"); + } + + #[test] + fn extracts_defrecord_as_record() { + let s = parse_clj("(defrecord Point [x y])"); + let rec = s.definitions.iter().find(|d| d.name == "Point").unwrap(); + assert_eq!(rec.kind, "record"); + } + + #[test] + fn extracts_deftype_as_type() { + let s = parse_clj("(deftype Box [v])"); + let t = s.definitions.iter().find(|d| d.name == "Box").unwrap(); + assert_eq!(t.kind, "type"); + } + + #[test] + fn extracts_def_as_variable() { + let s = parse_clj("(def pi 3.14)"); + let pi = s.definitions.iter().find(|d| d.name == "pi").unwrap(); + assert_eq!(pi.kind, "variable"); + } + + #[test] + fn skips_keyword_first_symbol_as_call() { + // `:require` is a keyword, not a callable — must not produce a call. + let s = parse_clj("(:require [x])"); + assert!(!s.calls.iter().any(|c| c.name.starts_with(':'))); + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b0253189..9523788c 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -360,6 +360,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const CLOJURE_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["str_lit"], + regex_types: &["regex_lit"], + quote_chars: &['"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f9..723e341f 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -1,5 +1,6 @@ pub mod bash; pub mod c; +pub mod clojure; pub mod cpp; pub mod csharp; pub mod dart; @@ -126,5 +127,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Clojure => { + clojure::ClojureExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb15781..1e357cd8 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,6 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "clj", "cljs", "cljc", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. @@ -43,8 +44,8 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// Mirrors the predicate at the heart of `collect_files`: a file is collected /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in /// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` -/// so that files outside Rust's capability (e.g. WASM-only `.clj`, `.gleam`, -/// `.jl`) are not flagged as "removed" merely because the orchestrator's +/// so that files outside Rust's capability (e.g. WASM-only `.gleam`, `.jl`, +/// `.fs`) are not flagged as "removed" merely because the orchestrator's /// narrower collector never sees them. pub fn is_supported_extension(path: &str) -> bool { if LanguageKind::from_extension(path).is_some() { diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f2..4dcea011 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Clojure, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Clojure => "clojure", } } @@ -97,6 +99,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "clj" | "cljs" | "cljc" => Some(Self::Clojure), _ => None, } } @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "clojure" => Some(Self::Clojure), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Clojure => tree_sitter_clojure_orchard::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, Clojure, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::Clojure => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59..733370dd 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -153,6 +153,11 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const CLOJURE_AST_TYPES: Record = { + str_lit: 'string', + regex_lit: 'regex', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -177,6 +182,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['clojure', CLOJURE_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -211,6 +217,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const CLOJURE_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -236,6 +243,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['clojure', CLOJURE_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd80..63083978 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -471,6 +471,9 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.clj', + '.cljs', + '.cljc', ]); /** diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d5..9e7d0b5e 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -17,7 +17,6 @@ describe('classifyNativeDrops', () => { const { byReason, totals } = classifyNativeDrops([ 'src/a.fs', 'src/b.gleam', - 'src/c.clj', 'src/d.jl', 'src/e.R', 'src/f.erl', @@ -27,7 +26,7 @@ describe('classifyNativeDrops', () => { 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']);