Skip to content

Commit 987bd34

Browse files
committed
update trace so that it evaluate all commits
1 parent 1439727 commit 987bd34

File tree

2 files changed

+31
-43
lines changed

2 files changed

+31
-43
lines changed

agent/run_agent.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import json
1616
import subprocess
1717
from agent.agents import AiderAgents
18-
from typing import Optional, Type, cast
18+
from typing import Optional, Type, cast, Dict
1919
from types import TracebackType
2020
from agent.class_types import AgentConfig
2121
from commit0.harness.constants import SPLIT
@@ -46,19 +46,31 @@ def __exit__(
4646
os.chdir(self.cwd)
4747

4848

49-
def run_eval_after_each_commit(
50-
branch: str, backend: str, commit0_config_file: str
51-
) -> str:
49+
def run_eval_after_all_changes(
50+
repo: Repo, start_commit: str, branch: str, backend: str, commit0_config_file: str
51+
) -> Dict[str, str]:
5252
"""Run the eval command after each commit."""
5353
eval_cmd = f"python -m commit0 evaluate --branch {branch} --backend {backend} --commit0-config-file {commit0_config_file} --timeout 100"
54-
try:
55-
result = subprocess.run(
56-
eval_cmd, shell=True, capture_output=True, text=True, check=True
57-
)
58-
return result.stdout
59-
except subprocess.CalledProcessError as e:
60-
print(f"Error running eval command: {e}")
61-
return e.stdout if e.stdout else str(e)
54+
55+
results = {}
56+
commits = list(repo.iter_commits(f"{start_commit}..{branch}"))
57+
commits.reverse() # Go from oldest to newest
58+
59+
for commit in commits:
60+
repo.git.checkout(commit.hexsha, detach=True)
61+
try:
62+
result = subprocess.run(
63+
eval_cmd, shell=True, capture_output=True, text=True, check=True
64+
)
65+
results[commit.hexsha] = result.stdout
66+
except subprocess.CalledProcessError as e:
67+
print(f"Error running eval command for commit {commit.hexsha}: {e}")
68+
results[commit.hexsha] = e.stdout if e.stdout else str(e)
69+
70+
# Return to original branch
71+
repo.git.checkout(branch)
72+
73+
return results
6274

6375

6476
def run_agent_for_repo(
@@ -178,11 +190,6 @@ def run_agent_for_repo(
178190
test_log_dir,
179191
test_first=True,
180192
)
181-
if agent_config.record_test_for_each_commit:
182-
current_commit = local_repo.head.commit.hexsha
183-
eval_results[current_commit] = run_eval_after_each_commit(
184-
branch, backend, commit0_config_file
185-
)
186193

187194
# after running the agent, update the money display
188195
update_queue.put(
@@ -211,11 +218,6 @@ def run_agent_for_repo(
211218
lint_log_dir,
212219
lint_first=True,
213220
)
214-
if agent_config.record_test_for_each_commit:
215-
current_commit = local_repo.head.commit.hexsha
216-
eval_results[current_commit] = run_eval_after_each_commit(
217-
branch, backend, commit0_config_file
218-
)
219221

220222
# after running the agent, update the money display
221223
update_queue.put(
@@ -240,11 +242,6 @@ def run_agent_for_repo(
240242
repo_name, agent_config.use_lint_info, commit0_config_file
241243
)
242244
agent_return = agent.run(message, "", lint_cmd, [f], file_log_dir)
243-
if agent_config.record_test_for_each_commit:
244-
current_commit = local_repo.head.commit.hexsha
245-
eval_results[current_commit] = run_eval_after_each_commit(
246-
branch, backend, commit0_config_file
247-
)
248245

249246
update_queue.put(
250247
(
@@ -253,6 +250,9 @@ def run_agent_for_repo(
253250
)
254251
)
255252
if agent_config.record_test_for_each_commit:
253+
eval_results = run_eval_after_all_changes(
254+
local_repo, example["base_commit"], branch, backend, commit0_config_file
255+
)
256256
with open(experiment_log_dir / "eval_results.json", "w") as f:
257257
json.dump(eval_results, f)
258258

agent/run_agent_no_rich.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from commit0.cli import read_commit0_config_file
2525
from pathlib import Path
2626
from datetime import datetime
27-
from agent.run_agent import DirContext, run_eval_after_each_commit
27+
from agent.run_agent import DirContext, run_eval_after_all_changes
2828

2929

3030
def run_agent_for_repo(
@@ -138,11 +138,6 @@ def run_agent_for_repo(
138138
test_log_dir,
139139
test_first=True,
140140
)
141-
if agent_config.record_test_for_each_commit:
142-
current_commit = local_repo.head.commit.hexsha
143-
eval_results[current_commit] = run_eval_after_each_commit(
144-
branch, backend, commit0_config_file
145-
)
146141
elif agent_config.run_entire_dir_lint:
147142
# when unit test feedback is available, iterate over test files
148143
for lint_file in lint_files:
@@ -161,11 +156,6 @@ def run_agent_for_repo(
161156
lint_log_dir,
162157
lint_first=True,
163158
)
164-
if agent_config.record_test_for_each_commit:
165-
current_commit = local_repo.head.commit.hexsha
166-
eval_results[current_commit] = run_eval_after_each_commit(
167-
branch, backend, commit0_config_file
168-
)
169159
else:
170160
# when unit test feedback is not available, iterate over target files to edit
171161
message = get_message(agent_config, repo_path, test_files=test_files)
@@ -180,12 +170,10 @@ def run_agent_for_repo(
180170
repo_name, agent_config.use_lint_info, commit0_config_file
181171
)
182172
_ = agent.run(message, "", lint_cmd, [f], file_log_dir)
183-
if agent_config.record_test_for_each_commit:
184-
current_commit = local_repo.head.commit.hexsha
185-
eval_results[current_commit] = run_eval_after_each_commit(
186-
branch, backend, commit0_config_file
187-
)
188173
if agent_config.record_test_for_each_commit:
174+
eval_results = run_eval_after_all_changes(
175+
local_repo, example["base_commit"], branch, backend, commit0_config_file
176+
)
189177
with open(experiment_log_dir / "eval_results.json", "w") as f:
190178
json.dump(eval_results, f)
191179

0 commit comments

Comments
 (0)