DeusData
diff --git a/‎Makefile.cbm‎
Lines changed: 1 addition & 0 deletions b/‎Makefile.cbm‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎internal/cbm/cbm.c‎
Lines changed: 206 additions & 0 deletions b/‎internal/cbm/cbm.c‎
Lines changed: 206 additions & 0 deletions
diff --git a/‎internal/cbm/cbm.h‎
Lines changed: 19 additions & 0 deletions b/‎internal/cbm/cbm.h‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎internal/cbm/extract_calls.c‎
Lines changed: 3 additions & 0 deletions b/‎internal/cbm/extract_calls.c‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/cbm/extract_defs.c‎
Lines changed: 19 additions & 4 deletions b/‎internal/cbm/extract_defs.c‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎internal/cbm/extract_unified.c‎
Lines changed: 17 additions & 0 deletions b/‎internal/cbm/extract_unified.c‎
Lines changed: 17 additions & 0 deletions
@@ -195,6 +195,7 @@ PIPELINE_SRCS = \
     src/pipeline/pass_k8s.c \
     src/pipeline/pass_similarity.c \
     src/pipeline/pass_semantic_edges.c \
+    src/pipeline/pass_complexity.c \
     src/pipeline/pass_cross_repo.c \
     src/pipeline/artifact.c \
     src/pipeline/pass_pkgmap.c
 
@@ -16,6 +16,7 @@
 #include <stdint.h> // uint32_t, uint64_t, int64_t
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
 #include <time.h> // struct timespec, CLOCK_MONOTONIC
 
 // Atomic counters for profiling parse vs extraction time (nanoseconds).
@@ -28,6 +29,20 @@ static _Atomic uint64_t total_preprocess_ns = 0;
 static _Atomic uint64_t total_files_preprocessed = 0;
 static _Atomic uint64_t total_files = 0;
 
+// C/C++ preprocessor #define macros are extracted as Macro nodes (#375). On a
+// macro-dense codebase (e.g. the Linux kernel: ~2.4M macros, 49% of all nodes)
+// this is the dominant extraction cost, so it is gated to the full/advanced
+// index modes. Default ON to preserve behavior for direct callers/tests; the
+// pipeline sets it from the index mode before extraction. Set once pre-extract,
+// read-only during, so a relaxed atomic is sufficient.
+static _Atomic int g_extract_macros = 1;
+void cbm_set_macro_extraction(int enabled) {
+    atomic_store_explicit(&g_extract_macros, enabled ? 1 : 0, memory_order_relaxed);
+}
+int cbm_macro_extraction_enabled(void) {
+    return atomic_load_explicit(&g_extract_macros, memory_order_relaxed);
+}
+
 #define NSEC_PER_SEC 1000000000ULL
 #define USEC_TO_NSEC 1000ULL
 /* Use compat.h's cbm_clock_gettime which accepts CLOCK_MONOTONIC (value
@@ -251,6 +266,100 @@ void cbm_shutdown(void) {
     cbm_initialized = 0;
 }
 
+// --- Bottleneck call-name classification (language-agnostic heuristics) ---
+
+// Case-insensitive equality for short callee names.
+static bool name_ieq(const char *a, const char *b) {
+    for (; *a && *b; a++, b++) {
+        if (tolower((unsigned char)*a) != tolower((unsigned char)*b)) {
+            return false;
+        }
+    }
+    return *a == '\0' && *b == '\0';
+}
+
+static bool name_in_set(const char *name, const char *const *set) {
+    for (const char *const *s = set; *s; s++) {
+        if (name_ieq(name, *s)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+// Linear-scan / membership calls: a hit inside a loop is the textbook hidden
+// O(n^2) (cf. Olivo et al., PLDI'15) that syntactic loop-depth alone misses.
+static bool is_linear_scan_name(const char *n) {
+    static const char *const set[] = {"find",    "indexof", "contains", "includes", "search",
+                                      "lookup",  "strstr",  "strchr",   "strrchr",  "memchr",
+                                      "find_if", "findindex", "count",  "index",    NULL};
+    return name_in_set(n, set);
+}
+
+// Allocation / growable-append calls: repeated inside a loop is the classic
+// accidental reallocation / string-concat O(n^2). Names are deliberately
+// conservative; meaningless in some languages → simply never matches there.
+static bool is_alloc_name(const char *n) {
+    static const char *const set[] = {"malloc",       "calloc",  "realloc", "strdup",
+                                      "strndup",      "append",  "push_back", "emplace_back",
+                                      "concat",       "strcat",  "strncat",  "push",
+                                      "pushback",     NULL};
+    return name_in_set(n, set);
+}
+
+// Count parameters from a signature string like "(int a, Foo* b, cb (*)(int,int))".
+// Fallback for languages where param_names isn't populated (e.g. C keeps only the
+// signature text). Counts commas at the top paren level; treats "()"/"(void)" as 0.
+// Approximate by design (a structural smell, not an exact arity).
+static int count_params_from_signature(const char *sig) {
+    if (!sig) {
+        return 0;
+    }
+    const char *p = sig;
+    while (*p && *p != '(') {
+        p++;
+    }
+    if (*p != '(') {
+        return 0;
+    }
+    p++;
+    const char *list = p;
+    int depth = 0;
+    int commas = 0;
+    bool any = false;
+    for (; *p; p++) {
+        char ch = *p;
+        if (ch == '(' || ch == '[' || ch == '{' || ch == '<') {
+            depth++;
+        } else if (ch == ')') {
+            if (depth == 0) {
+                break;
+            }
+            depth--;
+        } else if (ch == ']' || ch == '}' || ch == '>') {
+            if (depth > 0) {
+                depth--;
+            }
+        } else if (ch == ',' && depth == 0) {
+            commas++;
+        } else if (!isspace((unsigned char)ch)) {
+            any = true;
+        }
+    }
+    if (!any) {
+        return 0; /* "()" */
+    }
+    if (commas == 0) {
+        while (*list == ' ' || *list == '\t') {
+            list++;
+        }
+        if (strncmp(list, "void", 4) == 0 && (list[4] == ')' || list[4] == ' ' || list[4] == '\0')) {
+            return 0; /* C "(void)" */
+        }
+    }
+    return commas + 1;
+}
+
 // --- Main extraction function ---
 
 CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage language,
@@ -396,6 +505,12 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage
     }
     atomic_fetch_add(&total_lsp_ns, now_ns() - lsp_start);
 
+    // Calls extracted so far all carry ORIGINAL-source line numbers; the C/C++
+    // preprocessor second pass below appends calls with EXPANDED-source lines,
+    // which must not be used for the def line-range attribution of the bottleneck
+    // metrics. Remember the boundary.
+    int orig_calls_count = result->calls.count;
+
     // Second pass: preprocess C/C++/CUDA and extract additional macro-hidden calls.
     // Defs keep original-source line numbers; only CALLS are extracted from expanded source.
     if (language == CBM_LANG_C || language == CBM_LANG_CPP || language == CBM_LANG_CUDA) {
@@ -457,6 +572,97 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage
         atomic_fetch_add(&total_preprocess_ns, now_ns() - pp_start);
     }
 
+    // Bottleneck call-context metrics. Each call is attributed to the INNERMOST
+    // enclosing Function/Method def by source-line range (defs and calls in one
+    // CBMFileResult share the same file). Range matching is used instead of
+    // enclosing_func_qn string matching because some grammars (notably C, whose
+    // function_definition has no "name" field) attribute the call's scope to the
+    // module rather than the function — line ranges are unambiguous and
+    // language-agnostic. Bounded per file (defs x calls), not a repo-scale scan.
+    int def_count = result->defs.count;
+    bool *has_self = def_count > 0 ? calloc((size_t)def_count, sizeof(bool)) : NULL;
+    bool *has_guarded = def_count > 0 ? calloc((size_t)def_count, sizeof(bool)) : NULL;
+
+    // param_count is a standalone structural smell (independent of calls). Prefer
+    // the parsed param_names array; fall back to counting from the signature text
+    // for languages (e.g. C) that populate only the signature.
+    for (int di = 0; di < def_count; di++) {
+        CBMDefinition *d = &result->defs.items[di];
+        int pc = 0;
+        if (d->param_names) {
+            while (d->param_names[pc]) {
+                pc++;
+            }
+        }
+        if (pc == 0 && d->signature) {
+            pc = count_params_from_signature(d->signature);
+        }
+        d->param_count = pc;
+    }
+
+    for (int ci = 0; ci < orig_calls_count; ci++) {
+        const CBMCall *c = &result->calls.items[ci];
+        if (!c->callee_name || c->start_line <= 0) {
+            continue;
+        }
+        // Innermost enclosing Function/Method def by line range (smallest span).
+        int best = -1;
+        int best_span = -1;
+        for (int di = 0; di < def_count; di++) {
+            const CBMDefinition *d = &result->defs.items[di];
+            if (!d->name || !d->label ||
+                (strcmp(d->label, "Function") != 0 && strcmp(d->label, "Method") != 0)) {
+                continue;
+            }
+            if ((int)d->start_line <= c->start_line && c->start_line <= (int)d->end_line) {
+                int span = (int)d->end_line - (int)d->start_line;
+                if (best < 0 || span < best_span) {
+                    best_span = span;
+                    best = di;
+                }
+            }
+        }
+        if (best < 0) {
+            continue;
+        }
+        CBMDefinition *d = &result->defs.items[best];
+        // callee_name may be bare ("recur") or qualified ("pkg.recur", "self.recur")
+        const char *dot = strrchr(c->callee_name, '.');
+        const char *callee_short = dot ? dot + 1 : c->callee_name;
+        bool in_loop = c->loop_depth > 0;
+
+        if (strcmp(callee_short, d->name) == 0) {
+            // Direct self-recursion. The call graph omits self-edges (pass_calls
+            // skips source==target), so detect it here; seeds "recursive".
+            d->is_recursive = true;
+            if (has_self) {
+                has_self[best] = true;
+            }
+            if (in_loop) {
+                d->recursion_in_loop = true; // recursion compounded by a loop
+            }
+            if (c->branch_depth > 0 && has_guarded) {
+                has_guarded[best] = true; // a self-call guarded by some conditional
+            }
+        }
+        if (in_loop && is_linear_scan_name(callee_short)) {
+            d->linear_scan_in_loop++; // hidden O(n^2): linear scan inside a loop
+        }
+        if (in_loop && is_alloc_name(callee_short)) {
+            d->alloc_in_loop++; // repeated allocation/append inside a loop
+        }
+    }
+
+    // Recursive with no self-call guarded by any conditional → no obvious base
+    // case on the recursive path: a stronger "potentially unbounded" signal.
+    for (int di = 0; di < def_count; di++) {
+        if (has_self && has_self[di] && !(has_guarded && has_guarded[di])) {
+            result->defs.items[di].unguarded_recursion = true;
+        }
+    }
+    free(has_self);
+    free(has_guarded);
+
     uint64_t t2 = now_ns();
 
     result->imports_count = result->imports.count;
 
@@ -195,6 +195,16 @@ typedef struct {
     const char *route_path;    // HTTP route path from decorator (e.g., "/api/users") or NULL
     const char *route_method;  // HTTP method from decorator (e.g., "POST") or NULL
     int complexity;            // cyclomatic complexity
+    int cognitive;             // cognitive complexity (nesting-weighted)
+    int loop_count;            // number of loop constructs in the body
+    int loop_depth;            // max nested-loop depth (bottleneck proxy)
+    bool is_recursive;         // body contains a direct self-call (seed for "recursive")
+    int param_count;           // number of parameters (large = complexity smell)
+    int max_access_depth;      // deepest chained member/subscript access (a.b.c.d)
+    int linear_scan_in_loop;   // count of linear-scan calls (find/contains/indexOf) inside loops
+    int alloc_in_loop;         // count of allocation/append calls inside loops
+    bool recursion_in_loop;    // a self-call occurs inside a loop body
+    bool unguarded_recursion;  // recursive with no self-call guarded by a conditional
     int lines;                 // body line count
     uint32_t *fingerprint;     // MinHash fingerprint (arena-allocated, K values) or NULL
     int fingerprint_k;         // number of hash values (CBM_MINHASH_K or 0)
@@ -223,6 +233,9 @@ typedef struct {
     const char *second_arg_name;        // second argument identifier (handler ref) or NULL
     CBMCallArg args[CBM_MAX_CALL_ARGS]; // first N arguments with expressions
     int arg_count;                      // number of captured arguments
+    int loop_depth;                     // enclosing loop nesting at the call site
+    int branch_depth;                   // enclosing branch nesting at the call site
+    int start_line;                     // 1-based source line of the call (for def range-match)
 } CBMCall;
 
 typedef struct {
@@ -532,6 +545,12 @@ uint64_t cbm_get_preprocess_ns(void);
 uint64_t cbm_get_files_preprocessed(void);
 void cbm_reset_profile(void);
 
+// Toggle C/C++ preprocessor Macro-node extraction (#375). The pipeline enables
+// it only for full/advanced index modes (it dominates extraction on macro-dense
+// codebases). Default ON. Set before extraction; read-only during.
+void cbm_set_macro_extraction(int enabled);
+int cbm_macro_extraction_enabled(void);
+
 // --- Internal helpers used by extractors ---
 
 // Growable array push functions (arena-allocated, no individual free needed).
 
@@ -727,6 +727,9 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk
             CBMCall call = {0};
             call.callee_name = callee;
             call.enclosing_func_qn = state->enclosing_func_qn;
+            call.loop_depth = state->loop_depth;     // enclosing loop nesting at this call
+            call.branch_depth = state->branch_depth; // enclosing branch nesting at this call
+            call.start_line = (int)ts_node_start_point(node).row + TS_LINE_OFFSET;
 
             TSNode args = ts_node_child_by_field_name(node, TS_FIELD("arguments"));
             if (!ts_node_is_null(args)) {
 
@@ -1671,6 +1671,17 @@ static void resolve_cpp_trailing_return(CBMArena *a, TSNode func_node, const cha
     }
 }
 
+/* Compute and store the structural complexity metrics for a definition. */
+static void set_def_complexity(CBMDefinition *def, TSNode body, const CBMLangSpec *spec) {
+    cbm_complexity_t cx;
+    cbm_compute_complexity(body, spec->branching_node_types, &cx);
+    def->complexity = cx.cyclomatic;
+    def->cognitive = cx.cognitive;
+    def->loop_count = cx.loop_count;
+    def->loop_depth = cx.loop_depth;
+    def->max_access_depth = cx.max_access_depth;
+}
+
 static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec) {
     CBMArena *a = ctx->arena;
 
@@ -1744,7 +1755,7 @@ static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
 
     // Complexity
     if (spec->branching_node_types && spec->branching_node_types[0]) {
-        def.complexity = cbm_count_branching(node, spec->branching_node_types);
+        set_def_complexity(&def, node, spec);
     }
 
     // MinHash fingerprint
@@ -2277,7 +2288,7 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_
     def.docstring = extract_docstring(a, child, ctx->source, ctx->language);
 
     if (spec->branching_node_types && spec->branching_node_types[0]) {
-        def.complexity = cbm_count_branching(child, spec->branching_node_types);
+        set_def_complexity(&def, child, spec);
     }
 
     // MinHash fingerprint
@@ -2425,7 +2436,7 @@ static void extract_rust_impl(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
         }
 
         if (spec->branching_node_types && spec->branching_node_types[0]) {
-            def.complexity = cbm_count_branching(child, spec->branching_node_types);
+            set_def_complexity(&def, child, spec);
         }
 
         // MinHash fingerprint
@@ -3829,7 +3840,11 @@ static void walk_defs(CBMExtractCtx *ctx, TSNode root, const CBMLangSpec *spec,
 
         if (is_c_preprocessor_lang(ctx->language) &&
             (strcmp(kind, "preproc_def") == 0 || strcmp(kind, "preproc_function_def") == 0)) {
-            extract_c_macro_def(ctx, node);
+            // Gated to full/advanced index modes — macros dominate extraction on
+            // macro-dense codebases (e.g. the Linux kernel). See #375.
+            if (cbm_macro_extraction_enabled()) {
+                extract_c_macro_def(ctx, node);
+            }
             continue; // the macro body is a preproc_arg — nothing more to extract
         }
 
 
@@ -36,6 +36,8 @@ static void recompute_state(WalkState *state, const char *module_qn) {
     state->enclosing_class_qn = NULL;
     state->inside_call = false;
     state->inside_import = false;
+    state->loop_depth = 0;
+    state->branch_depth = 0;
 
     for (int i = 0; i < state->scope_top; i++) {
         switch (state->scopes[i].kind) {
@@ -51,6 +53,12 @@ static void recompute_state(WalkState *state, const char *module_qn) {
         case SCOPE_IMPORT:
             state->inside_import = true;
             break;
+        case SCOPE_LOOP:
+            state->loop_depth++;
+            break;
+        case SCOPE_BRANCH:
+            state->branch_depth++;
+            break;
         default:
             break;
         }
@@ -686,6 +694,15 @@ static void push_boundary_scopes(CBMExtractCtx *ctx, TSNode node, const CBMLangS
     if (spec->import_node_types && cbm_kind_in_set(node, spec->import_node_types)) {
         push_scope(state, SCOPE_IMPORT, depth, NULL);
     }
+    /* Loop / branch nesting for bottleneck metrics. Loops are gated on named
+     * nodes so anonymous `for`/`while` keyword tokens don't count. A loop is NOT
+     * also counted as a branch (many specs list loops in branching_node_types,
+     * but a loop is not a base-case guard for the unguarded-recursion signal). */
+    if (ts_node_is_named(node) && cbm_is_loop_node_type(ts_node_type(node))) {
+        push_scope(state, SCOPE_LOOP, depth, NULL);
+    } else if (spec->branching_node_types && cbm_kind_in_set(node, spec->branching_node_types)) {
+        push_scope(state, SCOPE_BRANCH, depth, NULL);
+    }
 }
 
 void cbm_extract_unified(CBMExtractCtx *ctx) {