-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathengine.py
More file actions
153 lines (123 loc) · 4.87 KB
/
engine.py
File metadata and controls
153 lines (123 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""Discovery engine: scans a repository and produces a component inventory.
This replaces the LLM-driven discovery in the lead agent prompt.
Runs in seconds with zero LLM calls.
"""
import json
import logging
from pathlib import Path
from typing import List
from agent.schemas.core import Component, ComponentKind
from .languages import ALL_PLUGINS
from .validator import validate_discovery
logger = logging.getLogger(__name__)
def discover_components(
repo_root: Path,
output_dir: Path | None = None,
) -> List[Component]:
"""Scan a repository and discover all components.
Args:
repo_root: Absolute path to the repository root.
output_dir: If provided, write components.json to this directory.
Returns:
List of discovered Component objects.
"""
repo_root = repo_root.resolve()
if not repo_root.is_dir():
raise FileNotFoundError(f"Repository root not found: {repo_root}")
all_components: List[Component] = []
seen_roots: set[str] = set()
for plugin in ALL_PLUGINS:
for pattern in plugin.manifest_patterns:
for manifest_path in sorted(repo_root.glob(pattern)):
# Skip excluded paths
if plugin.should_exclude(manifest_path):
continue
try:
components = plugin.parse_manifest(manifest_path, repo_root)
except Exception as e:
logger.warning(
"Failed to parse %s with %s plugin: %s",
manifest_path,
plugin.name,
e,
)
continue
for comp in components:
# Deduplicate by root_path
if comp.root_path in seen_roots:
continue
seen_roots.add(comp.root_path)
all_components.append(comp)
logger.info(
"%s plugin: found %d manifests",
plugin.name,
sum(1 for c in all_components if plugin.name.lower() in c.type),
)
# Resolve cross-references: internal dependencies should reference
# component names, not paths
_resolve_internal_deps(all_components)
# Detect repo shape
shape = _detect_repo_shape(all_components)
logger.info("Repo shape: %s (%d components)", shape, len(all_components))
# Validate
errors = validate_discovery(all_components, repo_root)
for err in errors:
logger.warning("Discovery validation: %s", err)
# Write output
if output_dir:
_write_output(all_components, output_dir)
return all_components
def _resolve_internal_deps(components: List[Component]) -> None:
"""Ensure internal_dependencies reference component names, not paths."""
name_set = {c.name for c in components}
root_to_name = {c.root_path: c.name for c in components}
for comp in components:
resolved = []
for dep in comp.internal_dependencies:
if dep in name_set:
resolved.append(dep)
elif dep in root_to_name:
resolved.append(root_to_name[dep])
else:
# Try partial match (e.g., "common" might match "my-common")
matches = [n for n in name_set if dep in n or n in dep]
if len(matches) == 1:
resolved.append(matches[0])
else:
logger.debug(
"Could not resolve internal dep '%s' for %s",
dep,
comp.name,
)
comp.internal_dependencies = resolved
def _detect_repo_shape(components: List[Component]) -> str:
"""Classify the repository shape."""
if len(components) == 0:
return "empty"
if len(components) == 1:
return "single-package"
languages = {c.type for c in components}
if len(languages) > 1:
return "polyglot-monorepo"
return "monorepo"
def _write_output(components: List[Component], output_dir: Path) -> None:
"""Write components.json as a flat list of all components."""
output_dir.mkdir(parents=True, exist_ok=True)
output = {
"components": [c.to_dict() for c in components],
"metadata": {
"total_components": len(components),
"by_kind": {
kind.value: sum(1 for c in components if c.kind == kind)
for kind in ComponentKind
if any(c.kind == kind for c in components)
},
"by_language": {
lang: sum(1 for c in components if c.type == lang)
for lang in sorted({c.type for c in components})
},
},
}
with open(output_dir / "components.json", "w") as f:
json.dump(output, f, indent=2)
logger.info("Wrote %d components to %s", len(components), output_dir)