Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 125 additions & 38 deletions src/extractors/julia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,41 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
return nameNode.text;
}

function qualifyName(base: string, currentModule: string | null): string {
// For qualified names (`function Base.show ... end` inside `module Foo`,
// or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
// is a `scoped_identifier` already containing the qualifier — skip the
// module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
if (currentModule && !base.includes('.')) return `${currentModule}.${base}`;
return base;
}

/**
* Extract the call_expression from a function/macro definition's signature.
*
* tree-sitter-julia wraps the signature in a `signature` node whose direct
* children include the `call_expression` for the function name and parameters.
* `findChild` only inspects direct children, so we unwrap one level explicitly.
* Without this step, `findChild(node, 'call_expression')` on a
* `function_definition` would match the *body's* first call_expression
* (e.g. `println(...)` inside the body) instead of the signature.
*/
function signatureCall(node: TreeSitterNode): TreeSitterNode | null {
const sig = findChild(node, 'signature');
if (sig) return findChild(sig, 'call_expression');
return findChild(node, 'call_expression');
}

function handleFunctionDef(
node: TreeSitterNode,
ctx: ExtractorOutput,
currentModule: string | null,
): void {
// function_definition may have a call_expression child as the signature
const callSig = findChild(node, 'call_expression');
const callSig = signatureCall(node);
if (callSig) {
const funcNameNode = callSig.child(0);
if (funcNameNode) {
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
const name = qualifyName(funcNameNode.text, currentModule);
const params = extractJuliaParams(callSig);
ctx.definitions.push({
name,
Expand All @@ -110,9 +134,8 @@ function handleFunctionDef(
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
if (!nameNode) return;

const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text;
ctx.definitions.push({
name,
name: qualifyName(nameNode.text, currentModule),
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand All @@ -133,11 +156,10 @@ function handleAssignment(
const funcNameNode = lhs.child(0);
if (!funcNameNode) return;

const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
const params = extractJuliaParams(lhs);

ctx.definitions.push({
name,
name: qualifyName(funcNameNode.text, currentModule),
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand All @@ -146,16 +168,69 @@ function handleAssignment(
}
}

/**
* Locate the base-name identifier within a `type_head` node.
*
* Handles plain identifiers, `Name <: Super` binary expressions, and
* parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
* into common wrapper kinds (binary expressions, parametrized type
* expressions, parameterized identifiers, type-parameter / type-argument
* lists). Returns `null` when no identifier can be located — callers should
* skip emitting a definition in that case.
*/
const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([
'binary_expression',
'parametrized_type_expression',
'parameterized_identifier',
'type_parameter_list',
'type_argument_list',
]);

function findBaseName(node: TreeSitterNode): TreeSitterNode | null {
if (node.type === 'identifier') return node;
const direct = findChild(node, 'identifier');
if (direct) return direct;
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
if (TYPE_HEAD_WRAPPERS.has(child.type)) {
const found = findBaseName(child);
if (found) return found;
}
}
return null;
}
Comment on lines +196 to +215
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 type_parameter_list / type_argument_list in TYPE_HEAD_WRAPPERS can yield the wrong identifier

findBaseName checks findChild(node, 'identifier') before recursing, so in practice the struct name is found before the loop reaches a type_parameter_list or type_argument_list. However, if findBaseName is ever called with a node that lacks a direct identifier child and does have one of those wrapper types as a child — for example, a future call site or an unusual parameterized form — the function will recurse into type_parameter_list and return the first type-parameter identifier (e.g. T) instead of the struct name. Removing those two entries from TYPE_HEAD_WRAPPERS would eliminate the risk without affecting correctness.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 8c2e148 — removed type_parameter_list and type_argument_list from TYPE_HEAD_WRAPPERS in both the WASM and native engines (preserving dual-engine parity per CLAUDE.md). AST inspection confirmed Julia's grammar uses curly_expression for {T} constructs, not those node kinds, so the entries were dead code. Removing them eliminates the risk of recursing into a type-parameter list and returning a type variable as the struct name, as you noted.


function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
// struct_definition: struct type_head fields... end
// type_head wraps the name and optional supertype. The name may be a
// bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
// of those nested inside a `binary_expression` (`Name <: Super`).
const typeHead = findChild(node, 'type_head');
const nameNode = typeHead
? (findChild(typeHead, 'identifier') ?? typeHead)
: findChild(node, 'identifier');
if (!typeHead) return;

let nameNode: TreeSitterNode | null;
let supertypeNode: TreeSitterNode | null = null;

const binary = findChild(typeHead, 'binary_expression');
if (binary) {
// Walk into each side of the binary expression to find the base-name
// identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
const sides: TreeSitterNode[] = [];
for (let i = 0; i < binary.childCount; i++) {
const c = binary.child(i);
if (c && c.type !== 'operator') sides.push(c);
}
nameNode = sides[0] ? findBaseName(sides[0]) : null;
supertypeNode = sides[1] ? findBaseName(sides[1]) : null;
} else {
nameNode = findBaseName(typeHead);
}
Comment on lines +228 to +241
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Missing test for non-parameterized struct inheritance

The old code explicitly looked for a subtype_expression node (findChild(typeHead, 'subtype_expression')) to detect Point <: AbstractPoint. That path has been entirely removed and replaced with a binary_expression lookup. If the tree-sitter-julia grammar represents simple non-parameterized inheritance with a subtype_expression node (rather than binary_expression), findChild(typeHead, 'binary_expression') returns null, findBaseName(typeHead) recurses without entering subtype_expression (not in TYPE_HEAD_WRAPPERS), and returns null — so the entire struct is silently dropped from ctx.definitions. The new parameterized test (Vec{T} <: AbstractArray{T,1}) confirms the grammar uses binary_expression for that form, but there is no test for the simple case struct Point <: AbstractPoint to verify the same grammar node is used and the extends relationship is still recorded.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 8c2e148 — added a test for non-parameterized struct inheritance (struct Point <: AbstractPoint) in tests/parsers/julia.test.ts. Confirmed via AST inspection that the Julia grammar wraps both the simple and parameterized cases in a binary_expression node, so the new code path handles both correctly. The native engine already had this test (crates/codegraph-core/src/extractors/julia.rs:592), so this brings WASM to parity.


if (!nameNode) return;
const structName = nameNode.text;

const children: SubDeclaration[] = [];
// Fields are typed_expression children of struct_definition
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
Expand All @@ -168,33 +243,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
line: child.startPosition.row + 1,
});
}
}
// Plain identifier fields (no type annotation)
if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) {
} else if (child.type === 'identifier') {
// Plain identifier fields (no type annotation) appear as direct
// identifier children of struct_definition. The type_head is a
// separate node so there is nothing to filter out here.
children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
}
}

// Check for supertype in type_head (Point <: AbstractPoint)
if (typeHead) {
const subtypeExpr = findChild(typeHead, 'subtype_expression');
if (subtypeExpr) {
// Find the supertype identifier
for (let i = 0; i < subtypeExpr.childCount; i++) {
const child = subtypeExpr.child(i);
if (child?.type === 'identifier' && i > 0) {
ctx.classes.push({
name: nameNode.text,
extends: child.text,
line: node.startPosition.row + 1,
});
}
}
}
if (supertypeNode) {
ctx.classes.push({
name: structName,
extends: supertypeNode.text,
line: node.startPosition.row + 1,
});
}

ctx.definitions.push({
name: nameNode.text,
name: structName,
kind: 'struct',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand Down Expand Up @@ -232,19 +298,40 @@ function handleMacroDef(
}

function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
// tree-sitter-julia shapes:
// `using LinearAlgebra` → using_statement [ using, identifier ]
// `import Foo.Bar` → import_statement [ import, scoped_identifier ]
// `import Base: show` → import_statement [ import, selected_import[Base, show] ]
// `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ]
const names: string[] = [];
let source = '';

for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
if (
child.type === 'identifier' ||
child.type === 'scoped_identifier' ||
child.type === 'selected_import'
) {
if (!source) source = child.text;
names.push(child.text.split('.').pop() || child.text);
if (child.type === 'identifier' || child.type === 'scoped_identifier') {
const txt = child.text;
if (!source) source = txt;
names.push(txt.split('.').pop() || txt);
} else if (child.type === 'selected_import') {
// First identifier-bearing node is the source module; the rest are
// imported names. The module may itself be a `scoped_identifier`
// (e.g. `import Foo.Bar: baz`) — handle it alongside bare
// `identifier` and use the trailing segment as the display name,
// mirroring the outer loop.
let first = true;
for (let j = 0; j < child.childCount; j++) {
const part = child.child(j);
if (!part) continue;
if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue;
const txt = part.text;
if (first) {
if (!source) source = txt;
first = false;
} else {
names.push(txt.split('.').pop() || txt);
}
}
}
}

Expand Down
50 changes: 50 additions & 0 deletions tests/parsers/julia.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,54 @@ import Base: show`);
push!(arr, 1)`);
expect(symbols.calls.length).toBeGreaterThanOrEqual(1);
});

it('extracts parameterized struct base name', () => {
// Parameterized struct names (e.g. `Vec{T}`) must record the base
// identifier — not be silently dropped or include type-parameter text.
const symbols = parseJulia(`struct Vec{T} <: AbstractArray{T,1}
data::Vector{T}
end`);
const names = symbols.definitions.map((d) => d.name);
expect(names).toContain('Vec');
expect(names.every((n) => !n.includes('{') && !n.includes('<'))).toBe(true);
expect(symbols.classes).toHaveLength(1);
expect(symbols.classes[0]).toMatchObject({ name: 'Vec', extends: 'AbstractArray' });
});

it('qualified short-form method does not double-prefix', () => {
// `Foo.bar(x, y) = x + y` inside `module Outer` must record `Foo.bar`,
// not `Outer.Foo.bar` — the scoped_identifier already carries the qualifier.
const symbols = parseJulia(`module Outer
Foo.bar(x, y) = x + y
end`);
const names = symbols.definitions.map((d) => d.name);
expect(names).toContain('Foo.bar');
expect(names).not.toContain('Outer.Foo.bar');
});

it('qualified function def does not double-prefix', () => {
// `function Base.show(io, x) ... end` inside `module Foo` must record
// `Base.show`, not `Foo.Base.show`.
const symbols = parseJulia(`module Foo
function Base.show(io, x)
println(io, x)
end
end`);
const names = symbols.definitions.map((d) => d.name);
expect(names).toContain('Base.show');
expect(names).not.toContain('Foo.Base.show');
});

it('selected_import handles qualified module', () => {
// `import Foo.Bar: baz` — module is a scoped_identifier. The import
// must record `Foo.Bar` as the source and `baz` as the imported name,
// not the malformed `source="baz", names=["baz"]`.
const symbols = parseJulia(`import LinearAlgebra.BLAS: gemm`);
expect(symbols.imports).toHaveLength(1);
expect(symbols.imports[0]).toMatchObject({
source: 'LinearAlgebra.BLAS',
});
expect(symbols.imports[0].names).toContain('gemm');
expect(symbols.imports[0].names).not.toContain('LinearAlgebra.BLAS');
});
});
Loading