-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdiagnose.py
More file actions
143 lines (127 loc) · 4.89 KB
/
diagnose.py
File metadata and controls
143 lines (127 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
diagnose.py — run this from the neural-lam root after indexing.
Windows-friendly replacement for grep-based inspection.
Usage:
python diagnose.py
"""
import sys
import os
import json
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from core.graph import KnowledgeGraph
from core.indexer import Indexer
from core.enricher import Enricher
from core.query import QueryEngine
from presets.neural_lam import apply
from output.llm_context import format_impact_as_context
NEURAL_LAM_PATH = "./neural_lam"
CACHE = ".codegraph/cache.json"
def reindex():
print(f"Indexing {NEURAL_LAM_PATH} ...")
graph = KnowledgeGraph()
graph.CACHE_FILE = CACHE
indexer = Indexer(graph, NEURAL_LAM_PATH)
indexer.index_path(NEURAL_LAM_PATH)
print(f" Raw: {graph.node_count()} nodes, {graph.edge_count()} edges")
Enricher(graph).run()
print(f" After enrichment: {graph.node_count()} nodes, {graph.edge_count()} edges")
apply(graph)
print(f" After preset: {graph.node_count()} nodes, {graph.edge_count()} edges")
graph.save(CACHE)
return graph
def print_section(title):
print(f"\n{'='*60}")
print(f" {title}")
print('='*60)
def run(graph):
engine = QueryEngine(graph)
# 1. Node type breakdown
print_section("NODE TYPE BREAKDOWN")
stats = graph.stats()
for k, v in stats["node_types"].items():
print(f" {k:25s} {v}")
# 2. Key functions we care about
print_section("KEY FUNCTIONS FOUND")
targets = [
"training_step", "validation_step", "forward",
"__getitem__", "predict_step", "configure_optimizers",
"encode", "decode", "process",
]
for t in targets:
matches = graph.resolve_node_id(t)
for m in matches:
d = graph.get_node(m)
hook = " ⚡" if d.get("is_lightning_hook") else ""
shapes = f" shapes={d['tensor_shapes']}" if d.get("tensor_shapes") else ""
print(f" {m}{hook}{shapes}")
# 3. All classes
print_section("CLASSES")
for nid, d in sorted(graph.g.nodes(data=True)):
if d.get("node_type") == "class":
bases = d.get("base_classes", [])
print(f" {nid}")
if bases:
print(f" inherits: {', '.join(bases)}")
# 4. Impact reports for the 3 most interesting nodes
print_section("IMPACT: ARModel.training_step")
r = engine.impact("training_step")
if "error" not in r and "ambiguous" not in r:
print(format_impact_as_context(r))
else:
print(json.dumps(r, indent=2))
print_section("IMPACT: BaseGraphModel (or GraphLAM) forward")
# forward is ambiguous — find all forward functions and pick the GNN one
forward_nodes = [
nid for nid, d in graph.g.nodes(data=True)
if d.get("name") == "forward" and d.get("node_type") == "function"
]
if not forward_nodes:
print("No forward() functions found in graph.")
elif len(forward_nodes) == 1:
r = engine.impact(forward_nodes[0])
print(format_impact_as_context(r))
else:
print(f"Multiple forward() nodes found ({len(forward_nodes)}):")
for n in forward_nodes:
print(f" {n}")
# Pick the most interesting one — prefer InteractionNet (the GNN core)
gnn_forward = next((n for n in forward_nodes if "interaction" in n.lower()), forward_nodes[0])
print(f"\nShowing impact for: {gnn_forward}")
r = engine.impact(gnn_forward)
print(format_impact_as_context(r))
print_section("IMPACT: WeatherDataset.__getitem__")
# Find the real WeatherDataset.__getitem__ node ID
getitem_candidates = [
nid for nid, d in graph.g.nodes(data=True)
if d.get("name") == "__getitem__"
and "WeatherDataset" in nid
and "Padded" not in nid # exclude the utility PaddedWeatherDataset
]
if getitem_candidates:
r = engine.impact(getitem_candidates[0])
print(format_impact_as_context(r))
else:
r = engine.impact("WeatherDataset.__getitem__")
print(format_impact_as_context(r) if "error" not in r else json.dumps(r, indent=2))
# 5. Highest centrality nodes — the load-bearing ones
print_section("HIGHEST IN-DEGREE NODES (most depended upon)")
scored = []
for nid in graph.g.nodes:
d = graph.get_node(nid)
if d.get("node_type") in ("function", "class"):
scored.append((graph.in_degree(nid), nid))
scored.sort(reverse=True)
for score, nid in scored[:15]:
print(f" [{score:3d} dependents] {nid}")
if __name__ == "__main__":
if "--reindex" in sys.argv or not os.path.exists(CACHE):
graph = reindex()
else:
graph = KnowledgeGraph()
graph.CACHE_FILE = CACHE
loaded = graph.load(CACHE)
if not loaded:
graph = reindex()
else:
print(f"Loaded from cache: {graph.node_count()} nodes, {graph.edge_count()} edges")
run(graph)