-
Notifications
You must be signed in to change notification settings - Fork 10
feat(native): port F# extractor to Rust #1104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e4dc776
8895e83
d03aabf
f2229ac
11156d4
c19956e
070933e
2dc3fbf
f7d92e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,302 @@ | ||
| use tree_sitter::{Node, Tree}; | ||
| use crate::cfg::build_function_cfg; | ||
| use crate::complexity::compute_all_metrics; | ||
| use crate::types::*; | ||
| use super::helpers::*; | ||
| use super::SymbolExtractor; | ||
|
|
||
| pub struct FSharpExtractor; | ||
|
|
||
| impl SymbolExtractor for FSharpExtractor { | ||
| fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { | ||
| let mut symbols = FileSymbols::new(file_path.to_string()); | ||
| walk_tree(&tree.root_node(), source, &mut symbols, match_fsharp_node); | ||
| walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &FSHARP_AST_CONFIG); | ||
| symbols | ||
| } | ||
| } | ||
|
|
||
| fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { | ||
| match node.kind() { | ||
| "named_module" => handle_named_module(node, source, symbols), | ||
| "function_declaration_left" => handle_function_decl(node, source, symbols), | ||
| "type_definition" => handle_type_def(node, source, symbols), | ||
| "import_decl" => handle_import_decl(node, source, symbols), | ||
| "application_expression" => handle_application(node, source, symbols), | ||
| "dot_expression" => handle_dot_expression(node, source, symbols), | ||
| _ => {} | ||
| } | ||
| } | ||
|
|
||
| /// Find the enclosing `named_module` and return its identifier text. | ||
| fn enclosing_module_name(node: &Node, source: &[u8]) -> Option<String> { | ||
| let module = find_parent_of_type(node, "named_module")?; | ||
| let id = find_child(&module, "long_identifier")?; | ||
| Some(node_text(&id, source).to_string()) | ||
| } | ||
|
|
||
| fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| let name_node = match find_child(node, "long_identifier") { | ||
| Some(n) => n, | ||
| None => return, | ||
| }; | ||
| symbols.definitions.push(Definition { | ||
| name: node_text(&name_node, source).to_string(), | ||
| kind: "module".to_string(), | ||
| line: start_line(node), | ||
| end_line: Some(end_line(node)), | ||
| decorators: None, | ||
| complexity: None, | ||
| cfg: None, | ||
| children: None, | ||
| }); | ||
| } | ||
|
|
||
| fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| // function_declaration_left: first child is the function name identifier, | ||
| // followed by argument_patterns. | ||
| let name_node = match find_child(node, "identifier") { | ||
| Some(n) => n, | ||
| None => return, | ||
| }; | ||
| let raw_name = node_text(&name_node, source).to_string(); | ||
| let line = start_line(node); | ||
|
|
||
| // Avoid duplicates — the DFS walk also visits the inner curried | ||
| // `function_declaration_left` of multi-parameter functions | ||
| // (e.g. `let add x y = …`), which would otherwise push the same | ||
| // `(name, line)` definition twice. Mirrors the JS extractor's guard, | ||
| // which compares against the raw (unqualified) identifier text. | ||
| if symbols | ||
| .definitions | ||
| .iter() | ||
| .any(|d| d.name == raw_name && d.line == line) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| let module_name = enclosing_module_name(node, source); | ||
| let qualified = match module_name { | ||
| Some(m) => format!("{}.{}", m, raw_name), | ||
| None => raw_name, | ||
| }; | ||
|
|
||
| let params = extract_fsharp_params(node, source); | ||
|
|
||
| // JS extractor uses the parent's endLine (the function_or_value_defn) for | ||
| // a tighter bound; do the same to preserve parity. | ||
| let end = node.parent().unwrap_or(*node); | ||
|
|
||
| symbols.definitions.push(Definition { | ||
| name: qualified, | ||
| kind: "function".to_string(), | ||
| line, | ||
| end_line: Some(end_line(&end)), | ||
| decorators: None, | ||
| complexity: compute_all_metrics(&end, source, "fsharp"), | ||
| cfg: build_function_cfg(&end, "fsharp", source), | ||
| children: opt_children(params), | ||
| }); | ||
| } | ||
|
|
||
| fn extract_fsharp_params(decl_left: &Node, source: &[u8]) -> Vec<Definition> { | ||
| let mut params = Vec::new(); | ||
| if let Some(arg_patterns) = find_child(decl_left, "argument_patterns") { | ||
| collect_param_identifiers(&arg_patterns, source, &mut params); | ||
| } | ||
| params | ||
| } | ||
|
|
||
| fn collect_param_identifiers(node: &Node, source: &[u8], params: &mut Vec<Definition>) { | ||
| if node.kind() == "identifier" { | ||
| params.push(child_def( | ||
| node_text(node, source).to_string(), | ||
| "parameter", | ||
| start_line(node), | ||
| )); | ||
| return; | ||
| } | ||
| for i in 0..node.child_count() { | ||
| if let Some(child) = node.child(i) { | ||
| collect_param_identifiers(&child, source, params); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn handle_type_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| // type_definition contains union_type_defn, record_type_defn, etc. | ||
| for i in 0..node.child_count() { | ||
| let child = match node.child(i) { | ||
| Some(c) => c, | ||
| None => continue, | ||
| }; | ||
| let kind = child.kind(); | ||
| if !matches!( | ||
| kind, | ||
| "union_type_defn" | ||
| | "record_type_defn" | ||
| | "type_abbreviation_defn" | ||
| | "class_type_defn" | ||
| | "interface_type_defn" | ||
| | "type_defn" | ||
| ) { | ||
| continue; | ||
| } | ||
|
|
||
| let name = match find_child(&child, "type_name") { | ||
| Some(type_name) => find_child(&type_name, "identifier") | ||
| .map(|n| node_text(&n, source).to_string()) | ||
| .unwrap_or_else(|| node_text(&type_name, source).to_string()), | ||
| None => match find_child(&child, "identifier") { | ||
| Some(id) => node_text(&id, source).to_string(), | ||
| None => continue, | ||
| }, | ||
| }; | ||
|
|
||
| let mut children: Vec<Definition> = Vec::new(); | ||
| extract_type_members(&child, source, &mut children); | ||
|
|
||
| symbols.definitions.push(Definition { | ||
| name, | ||
| kind: determine_type_kind(kind).to_string(), | ||
| line: start_line(&child), | ||
| end_line: Some(end_line(&child)), | ||
| decorators: None, | ||
| complexity: None, | ||
| cfg: None, | ||
| children: opt_children(children), | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| fn determine_type_kind(node_kind: &str) -> &'static str { | ||
| match node_kind { | ||
| "union_type_defn" => "enum", | ||
| "record_type_defn" => "record", | ||
| "class_type_defn" => "class", | ||
| "interface_type_defn" => "interface", | ||
| _ => "type", | ||
| } | ||
| } | ||
|
|
||
| fn extract_type_members(type_defn: &Node, source: &[u8], children: &mut Vec<Definition>) { | ||
| for i in 0..type_defn.child_count() { | ||
| let child = match type_defn.child(i) { | ||
| Some(c) => c, | ||
| None => continue, | ||
| }; | ||
|
|
||
| match child.kind() { | ||
| "union_type_case" => { | ||
| if let Some(name) = find_child(&child, "identifier") { | ||
| children.push(child_def( | ||
| node_text(&name, source).to_string(), | ||
| "property", | ||
| start_line(&child), | ||
| )); | ||
| } | ||
| } | ||
| "record_field" => { | ||
| let name_node = child | ||
| .child_by_field_name("name") | ||
| .or_else(|| find_child(&child, "identifier")); | ||
| if let Some(name) = name_node { | ||
| children.push(child_def( | ||
| node_text(&name, source).to_string(), | ||
| "property", | ||
| start_line(&child), | ||
| )); | ||
| } | ||
| } | ||
| // Recurse into container nodes that hold cases/fields. | ||
| "union_type_cases" | "record_fields" => { | ||
| extract_type_members(&child, source, children); | ||
| } | ||
| _ => {} | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| let module_node = match find_child(node, "long_identifier") { | ||
| Some(n) => n, | ||
| None => return, | ||
| }; | ||
|
|
||
| let source_name = node_text(&module_node, source).to_string(); | ||
| let last = source_name | ||
| .split('.') | ||
| .last() | ||
| .unwrap_or(&source_name) | ||
| .to_string(); | ||
|
|
||
| symbols | ||
| .imports | ||
| .push(Import::new(source_name, vec![last], start_line(node))); | ||
| } | ||
|
|
||
| fn handle_application(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| let func_node = match node.child(0) { | ||
| Some(n) => n, | ||
| None => return, | ||
| }; | ||
|
|
||
| // Mirrors the JS extractor's `handleApplication`: the full dotted name | ||
| // (e.g. `Service.createUser`) is stored in `name`. Splitting `name` into | ||
| // `(receiver, method)` would diverge from the JS engine's output and | ||
| // change which resolution rules fire downstream. | ||
| match func_node.kind() { | ||
| "identifier" | "long_identifier" => { | ||
| symbols.calls.push(Call { | ||
| name: node_text(&func_node, source).to_string(), | ||
| line: start_line(node), | ||
| dynamic: None, | ||
| receiver: None, | ||
| }); | ||
| } | ||
| "long_identifier_or_op" => { | ||
| // Inner child is either `identifier` (bare, e.g. `validateUser`) or | ||
| // `long_identifier` (qualified, e.g. `Repository.save`). Order | ||
| // matches the JS extractor (`identifier` first). Operator forms | ||
| // like `( + )` have neither child; we emit nothing in that case, | ||
| // mirroring the JS extractor's silent skip. | ||
| if let Some(inner) = find_child(&func_node, "identifier") | ||
| .or_else(|| find_child(&func_node, "long_identifier")) | ||
| { | ||
| symbols.calls.push(Call { | ||
| name: node_text(&inner, source).to_string(), | ||
| line: start_line(node), | ||
| dynamic: None, | ||
| receiver: None, | ||
| }); | ||
| } | ||
| } | ||
|
Comment on lines
+257
to
+273
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Two behavioural differences exist versus the JS
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in the merge resolution commit. The handle_application branch for
See |
||
| _ => {} | ||
| } | ||
| } | ||
|
|
||
| fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) { | ||
| // Mirrors the JS extractor's `handleDotExpression`: collect identifier | ||
| // segments and emit `name = last`, `receiver = everything-before`. | ||
| let mut parts: Vec<String> = Vec::new(); | ||
| for i in 0..node.child_count() { | ||
| if let Some(child) = node.child(i) { | ||
| match child.kind() { | ||
| "identifier" | "long_identifier" => { | ||
| parts.push(node_text(&child, source).to_string()); | ||
| } | ||
| _ => {} | ||
| } | ||
| } | ||
| } | ||
| if parts.len() >= 2 { | ||
| let method = parts.last().cloned().unwrap_or_default(); | ||
| let receiver = parts[..parts.len() - 1].join("."); | ||
| symbols.calls.push(Call { | ||
| name: method, | ||
| line: start_line(node), | ||
| dynamic: None, | ||
| receiver: Some(receiver), | ||
| }); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The JS
handleFunctionDeclhas an explicit guard (lines 92–94 insrc/extractors/fsharp.ts) that skips adding a definition when one with the samenameandlinealready exists. The comment reads: "Avoid duplicates — the walk will also visit children." This guard exists because tree-sitter-fsharp nestsfunction_declaration_leftnodes for curried multi-parameter functions (e.g.let add x y = x + y), causing the walk to encounter two nodes at the same start line that both expose anidentifier "add"as a direct child. The Rustwalk_treevisits every node exactly once in DFS order, so both the outer and innerfunction_declaration_leftare processed, andhandle_function_declpushes two identicalDefinitionentries where the JS engine pushes one. Any downstream consumer that de-dupes by name+line will be unaffected, but callers that usedefinitions.len()or iterate all entries will see doubled results for every multi-parameter function in an F# file.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 2dc3fbf. The Rust extractor now mirrors the JS dedup guard: before pushing a function definition, it checks whether one with the same raw identifier name and start line already exists, and skips if so. The check uses the unqualified
raw_nameto match the JS behavior exactly (where the dedup compares againstnameNode.text). This eliminates the doubledDefinitionentries for curried multi-parameter functions likelet add x y = ….See
crates/codegraph-core/src/extractors/fsharp.rs:55-78.