Skip to content

Commit 72e3de7

Browse files
author
Atlas
committed
fix: remove all hardcoded command fallbacks
- Add DiscoveryError exception for LLM-unavailable scenarios - Remove pip install/npm test/pytest tests fallbacks in workspace.py - Remove hardcoded pip install -e . in agentic.py - Remove file count heuristics in pipeline.py - Pipeline now fails gracefully instead of using hardcoded fallbacks - Update test to expect None instead of 'easy' when no classifier
1 parent 15b31ae commit 72e3de7

5 files changed

Lines changed: 47 additions & 35 deletions

File tree

src/swe_forge/discovery/agentic.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,9 @@ async def discover_install_commands(
108108
f.endswith("setup.py") or f == "setup.py" for f in filenames
109109
)
110110
if has_pyproject or has_setup:
111-
discovered.install_commands = ["pip install -e ."]
111+
# Don't hardcode commands - let LLM discover them
112112
discovered.discovery_source = "python-package"
113-
discovered.confidence = "high"
114-
return discovered
113+
# Continue to LLM discovery below instead of returning early
115114

116115
# Step 1: Read CI/CD for tested commands
117116
ci_commands = self._extract_ci_install_commands(files, language)

src/swe_forge/exceptions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""Custom exceptions for swe-forge."""
2+
3+
4+
class DiscoveryError(Exception):
5+
"""Raised when command discovery fails and no LLM is available.
6+
7+
This exception is raised when:
8+
- No install commands were discovered via LLM
9+
- No test commands were discovered via LLM
10+
- LLM client is not configured for command discovery
11+
12+
The pipeline should fail gracefully rather than falling back to
13+
hardcoded commands.
14+
"""
15+
16+
pass

src/swe_forge/export/workspace.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,13 @@ def export_task_to_workspace(
4242
f"{docker_username}/swe-forge-tasks:{task.id}" if docker_username else None
4343
)
4444

45-
# Get install commands from config or defaults
45+
# Get install commands from config - NO FALLBACKS
4646
install_commands = task.install_config.get("install_commands", [])
4747
if not install_commands:
48-
if task.language == "python":
49-
install_commands = ["pip install -e .", "pip install pytest"]
50-
elif task.language in ("javascript", "typescript"):
51-
install_commands = ["npm install", "npm test"]
52-
elif task.language == "rust":
53-
install_commands = ["cargo build", "cargo test"]
48+
raise DiscoveryError(
49+
f"No install commands discovered for task {task.id}. "
50+
"Ensure LLM-based discovery is configured (OPENROUTER_API_KEY)."
51+
)
5452

5553
# Get test commands - fallback to test_patch extraction if empty
5654
fail_to_pass = list(task.fail_to_pass) if task.fail_to_pass else []
@@ -62,14 +60,12 @@ def export_task_to_workspace(
6260
if test_files:
6361
fail_to_pass = [f"pytest {f} -v" for f in test_files]
6462

65-
# Default test commands as last resort
63+
# No fallback test commands - require LLM discovery
6664
if not fail_to_pass:
67-
if task.language == "python":
68-
fail_to_pass = ["pytest tests/ -v"]
69-
elif task.language in ("javascript", "typescript"):
70-
fail_to_pass = ["npm test"]
71-
elif task.language == "rust":
72-
fail_to_pass = ["cargo test"]
65+
raise DiscoveryError(
66+
f"No test commands discovered for task {task.id}. "
67+
"Ensure TestGenerator is configured and ran successfully."
68+
)
7369

7470
# Build workspace data
7571
workspace_data: dict[str, Any] = {

src/swe_forge/swe/pipeline.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,13 @@ async def _preclassify_stage(
301301
response: TriageResponse = await classifier.classify_triage(pr_info)
302302
difficulty = response.difficulty
303303
else:
304-
if enriched.files_changed <= 2:
305-
difficulty = "easy"
306-
elif enriched.files_changed <= 5:
307-
difficulty = "medium"
308-
else:
309-
difficulty = "hard"
304+
# NO HARDCODED HEURISTICS - skip if no classifier
305+
logger.warning(
306+
"No classifier for %s#%d, skipping pre-classification",
307+
enriched.repo,
308+
enriched.number,
309+
)
310+
return None
310311

311312
if difficulty == "easy":
312313
metrics.preclassify_easy += 1
@@ -395,15 +396,13 @@ async def _deep_stage(
395396
else:
396397
metrics.difficulty_hard += 1
397398
else:
398-
if enriched.files_changed <= 2:
399-
task.difficulty_score = 1
400-
metrics.difficulty_easy += 1
401-
elif enriched.files_changed <= 5:
402-
task.difficulty_score = 2
403-
metrics.difficulty_medium += 1
404-
else:
405-
task.difficulty_score = 3
406-
metrics.difficulty_hard += 1
399+
# NO HARDCODED HEURISTICS - skip difficulty scoring
400+
task.difficulty_score = 0
401+
logger.warning(
402+
"No difficulty classifier for %s#%d, skipping scoring",
403+
enriched.repo,
404+
enriched.number,
405+
)
407406

408407
await self._emit_event(
409408
event_queue,

tests/test_swe/test_pipeline.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ async def test_difficulty_classifier_integration(self, mock_gh_client):
443443

444444
@pytest.mark.asyncio
445445
async def test_difficulty_classifier_fallback(self, mock_gh_client):
446-
"""Verify heuristics used when llm_client=None."""
446+
"""Verify NO HARDCODED HEURISTICS when llm_client=None."""
447447
from swe_forge.swe.enricher import EnrichedPullRequest
448448

449449
config = SwePipelineConfig()
@@ -470,8 +470,10 @@ async def test_difficulty_classifier_fallback(self, mock_gh_client):
470470

471471
result = await pipeline._preclassify_stage(enriched, semaphore, metrics)
472472

473-
assert result == "easy"
474-
assert metrics.preclassify_easy == 1
473+
# NO HARDCODED HEURISTICS - returns None when no classifier
474+
assert result is None
475+
# No classification happened
476+
assert metrics.preclassify_easy == 0
475477

476478
@pytest.mark.asyncio
477479
async def test_difficulty_classifier_score_mapping(self, mock_gh_client):

0 commit comments

Comments
 (0)