Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live

**Configuration:** All tunable behavioral constants live in `DEFAULTS` in `src/infrastructure/config.ts`, grouped by concern (`analysis`, `risk`, `search`, `display`, `community`, `structure`, `mcp`, `check`, `coChange`, `manifesto`). Users override via `.codegraphrc.json` — `mergeConfig` deep-merges recursively so partial overrides preserve sibling keys. Env vars override LLM settings (`CODEGRAPH_LLM_*`). When adding new behavioral constants, **always add them to `DEFAULTS`** and wire them through config — never introduce new hardcoded magic numbers in individual modules. Category F values (safety boundaries, standard formulas, platform concerns) are the only exception.

**Database:** SQLite at `.codegraph/graph.db` with tables: `nodes`, `edges`, `metadata`, `embeddings`, `function_complexity`
**Database:** SQLite at `.codegraph/graph.db` with tables: `nodes`, `edges`, `metadata`, `embeddings`, `function_complexity`, `ast_nodes` (stored `new`/`throw`/`await`/`string`/`regex` literals queryable via `codegraph ast`). Both engines must extract `ast_nodes` for every language they parse — per-language node-type maps live in `src/ast-analysis/rules/index.ts` (`AST_TYPE_MAPS`, `AST_STRING_CONFIGS`) and mirror the native `LangAstConfig` constants in `crates/codegraph-core/src/extractors/helpers.rs`. Adding a new language requires a matching entry in both.

## Test Structure

Expand Down
7 changes: 6 additions & 1 deletion crates/codegraph-core/src/extractors/javascript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,12 @@ fn walk_ast_nodes_depth(node: &Node, source: &[u8], ast_nodes: &mut Vec<AstNode>
let content = raw
.trim_start_matches(|c| c == '\'' || c == '"' || c == '`')
.trim_end_matches(|c| c == '\'' || c == '"' || c == '`');
if content.len() < 2 {
// Count Unicode code points, not UTF-8 bytes, so the filter matches
// helpers.rs `build_string_node` and the WASM visitor — a single non-
// ASCII glyph like `─` (3 bytes / 1 code point) must be treated as one
// character, otherwise we emit "excess" string nodes the WASM engine
// skips (see parity issue #1010).
if content.chars().count() < 2 {
// Still recurse children (template_string may have nested expressions)
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
Expand Down
12 changes: 11 additions & 1 deletion src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ import type {
} from '../types.js';
import { computeLOCMetrics, computeMaintainabilityIndex } from './metrics.js';
import {
AST_STRING_CONFIGS,
AST_TYPE_MAPS,
astStopRecurseKinds,
CFG_RULES,
COMPLEXITY_RULES,
DATAFLOW_RULES,
Expand Down Expand Up @@ -458,7 +460,15 @@ function setupAstVisitor(
for (const row of bulkNodeIdsByFile(db, relPath)) {
nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
}
return createAstStoreVisitor(astTypeMap, symbols.definitions || [], relPath, nodeIdMap);
const stringConfig = AST_STRING_CONFIGS.get(langId);
return createAstStoreVisitor(
astTypeMap,
symbols.definitions || [],
relPath,
nodeIdMap,
stringConfig,
astStopRecurseKinds(langId),
);
}

/** Set up complexity visitor if any definitions need WASM complexity analysis. */
Expand Down
9 changes: 8 additions & 1 deletion src/ast-analysis/rules/csharp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,11 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_statement: 'throw',
throw_expression: 'throw',
await_expression: 'await',
string_literal: 'string',
verbatim_string_literal: 'string',
};
5 changes: 4 additions & 1 deletion src/ast-analysis/rules/go.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
interpreted_string_literal: 'string',
raw_string_literal: 'string',
};
185 changes: 181 additions & 4 deletions src/ast-analysis/rules/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,187 @@ export const DATAFLOW_RULES: Map<string, DataflowRulesConfig> = new Map([
['ruby', ruby.dataflow],
]);

// ─── AST Type Maps ───────────────────────────────────────────────────────
// ─── AST Node Type Maps ──────────────────────────────────────────────────
//
// These mirror the per-language `LangAstConfig` constants in the native Rust
// engine (`crates/codegraph-core/src/extractors/helpers.rs`). WASM and native
// must agree on which tree-sitter node types to emit as `ast_nodes` rows.
// Languages without a dedicated rules/*.ts file have their maps inlined here.

const JS_AST_TYPES = javascript.astTypes as Record<string, string>;
const PY_AST_TYPES = python.astTypes as Record<string, string>;
const GO_AST_TYPES = go.astTypes as Record<string, string>;
const RS_AST_TYPES = rust.astTypes as Record<string, string>;
const JAVA_AST_TYPES = java.astTypes as Record<string, string>;
const CS_AST_TYPES = csharp.astTypes as Record<string, string>;
const RB_AST_TYPES = ruby.astTypes as Record<string, string>;
const PHP_AST_TYPES = php.astTypes as Record<string, string>;

const C_AST_TYPES: Record<string, string> = {
string_literal: 'string',
};

const CPP_AST_TYPES: Record<string, string> = {
new_expression: 'new',
throw_statement: 'throw',
co_await_expression: 'await',
string_literal: 'string',
raw_string_literal: 'string',
};

const KOTLIN_AST_TYPES: Record<string, string> = {
throw_expression: 'throw',
string_literal: 'string',
};

const SWIFT_AST_TYPES: Record<string, string> = {
throw_statement: 'throw',
await_expression: 'await',
string_literal: 'string',
};

const SCALA_AST_TYPES: Record<string, string> = {
object_creation_expression: 'new',
throw_expression: 'throw',
string_literal: 'string',
};

const BASH_AST_TYPES: Record<string, string> = {
string: 'string',
expansion: 'string',
};

const ELIXIR_AST_TYPES: Record<string, string> = {
string: 'string',
sigil: 'regex',
};

const LUA_AST_TYPES: Record<string, string> = {
string: 'string',
};

const DART_AST_TYPES: Record<string, string> = {
new_expression: 'new',
constructor_invocation: 'new',
throw_expression: 'throw',
await_expression: 'await',
string_literal: 'string',
};

const ZIG_AST_TYPES: Record<string, string> = {
string_literal: 'string',
};

const HASKELL_AST_TYPES: Record<string, string> = {
string: 'string',
char: 'string',
};

const OCAML_AST_TYPES: Record<string, string> = {
string: 'string',
};

export const AST_TYPE_MAPS: Map<string, Record<string, string>> = new Map([
['javascript', javascript.astTypes as Record<string, string>],
['typescript', javascript.astTypes as Record<string, string>],
['tsx', javascript.astTypes as Record<string, string>],
['javascript', JS_AST_TYPES],
['typescript', JS_AST_TYPES],
['tsx', JS_AST_TYPES],
['python', PY_AST_TYPES],
['go', GO_AST_TYPES],
['rust', RS_AST_TYPES],
['java', JAVA_AST_TYPES],
['csharp', CS_AST_TYPES],
['ruby', RB_AST_TYPES],
['php', PHP_AST_TYPES],
['c', C_AST_TYPES],
['cpp', CPP_AST_TYPES],
['kotlin', KOTLIN_AST_TYPES],
['swift', SWIFT_AST_TYPES],
['scala', SCALA_AST_TYPES],
['bash', BASH_AST_TYPES],
['elixir', ELIXIR_AST_TYPES],
['lua', LUA_AST_TYPES],
['dart', DART_AST_TYPES],
['zig', ZIG_AST_TYPES],
['haskell', HASKELL_AST_TYPES],
['ocaml', OCAML_AST_TYPES],
['ocaml-interface', OCAML_AST_TYPES],
]);

// ─── Per-language string-extraction config ───────────────────────────────
//
// Mirrors `quote_chars` + `string_prefixes` in the native `LangAstConfig`.
// Used by the AST-store visitor to strip quote characters and language-
// specific prefix sigils (Python `r"..."`, C# verbatim `@"..."`, Rust raw
// `r#"..."#`, etc.) when computing string content for the `name` column.

export interface AstStringConfig {
quoteChars: string;
stringPrefixes: string;
}

const JS_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"`', stringPrefixes: '' };
const PY_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: 'rbfuRBFU' };
const GO_STRING_CONFIG: AstStringConfig = { quoteChars: '"`', stringPrefixes: '' };
const RS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const JAVA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const CS_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const RB_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const PHP_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const C_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const CPP_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: 'LuUR' };
const KOTLIN_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const SWIFT_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const SCALA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const BASH_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
const ELIXIR_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const LUA_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' };
const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };
const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' };
const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' };

export const AST_STRING_CONFIGS: Map<string, AstStringConfig> = new Map([
['javascript', JS_STRING_CONFIG],
['typescript', JS_STRING_CONFIG],
['tsx', JS_STRING_CONFIG],
['python', PY_STRING_CONFIG],
['go', GO_STRING_CONFIG],
['rust', RS_STRING_CONFIG],
['java', JAVA_STRING_CONFIG],
['csharp', CS_STRING_CONFIG],
['ruby', RB_STRING_CONFIG],
['php', PHP_STRING_CONFIG],
['c', C_STRING_CONFIG],
['cpp', CPP_STRING_CONFIG],
['kotlin', KOTLIN_STRING_CONFIG],
['swift', SWIFT_STRING_CONFIG],
['scala', SCALA_STRING_CONFIG],
['bash', BASH_STRING_CONFIG],
['elixir', ELIXIR_STRING_CONFIG],
['lua', LUA_STRING_CONFIG],
['dart', DART_STRING_CONFIG],
['zig', ZIG_STRING_CONFIG],
['haskell', HASKELL_STRING_CONFIG],
['ocaml', OCAML_STRING_CONFIG],
['ocaml-interface', OCAML_STRING_CONFIG],
]);

// ─── Per-language "stop-after-collect" kinds ─────────────────────────────
//
// Mirrors the subtle difference between the native JS walker
// (`extractors/javascript.rs::walk_ast_nodes_depth`) — which *returns* after
// collecting `new_expression` and `throw_statement` to avoid double-counting
// the wrapped expression — and the generic walker (`helpers.rs::walk_ast_
// nodes_with_config_depth`), which always recurses. For WASM/native parity
// the JS family must skip recursion on `new` and `throw`; every other
// language recurses normally.

const JS_STOP_RECURSE: ReadonlySet<string> = new Set(['new', 'throw']);
const EMPTY_STOP_RECURSE: ReadonlySet<string> = new Set();

export function astStopRecurseKinds(langId: string): ReadonlySet<string> {
if (langId === 'javascript' || langId === 'typescript' || langId === 'tsx') {
return JS_STOP_RECURSE;
}
return EMPTY_STOP_RECURSE;
}
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_statement: 'throw',
string_literal: 'string',
};
7 changes: 6 additions & 1 deletion src/ast-analysis/rules/php.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,4 +218,9 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
object_creation_expression: 'new',
throw_expression: 'throw',
string: 'string',
encapsed_string: 'string',
};
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
raise_statement: 'throw',
await: 'await',
string: 'string',
};
5 changes: 4 additions & 1 deletion src/ast-analysis/rules/ruby.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,7 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
string: 'string',
regex: 'regex',
};
6 changes: 5 additions & 1 deletion src/ast-analysis/rules/rust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,8 @@ export const dataflow: DataflowRulesConfig = makeDataflowRules({

// ─── AST Node Types ───────────────────────────────────────────────────────

export const astTypes: Record<string, string> | null = null;
export const astTypes: Record<string, string> | null = {
await_expression: 'await',
string_literal: 'string',
raw_string_literal: 'string',
};
Loading
Loading