1515import json
1616import subprocess
1717from agent .agents import AiderAgents
18- from typing import Optional , Type , cast
18+ from typing import Optional , Type , cast , Dict
1919from types import TracebackType
2020from agent .class_types import AgentConfig
2121from commit0 .harness .constants import SPLIT
@@ -46,19 +46,31 @@ def __exit__(
4646 os .chdir (self .cwd )
4747
4848
49- def run_eval_after_each_commit (
50- branch : str , backend : str , commit0_config_file : str
51- ) -> str :
49+ def run_eval_after_all_changes (
50+ repo : Repo , start_commit : str , branch : str , backend : str , commit0_config_file : str
51+ ) -> Dict [ str , str ] :
5252 """Run the eval command after each commit."""
5353 eval_cmd = f"python -m commit0 evaluate --branch { branch } --backend { backend } --commit0-config-file { commit0_config_file } --timeout 100"
54- try :
55- result = subprocess .run (
56- eval_cmd , shell = True , capture_output = True , text = True , check = True
57- )
58- return result .stdout
59- except subprocess .CalledProcessError as e :
60- print (f"Error running eval command: { e } " )
61- return e .stdout if e .stdout else str (e )
54+
55+ results = {}
56+ commits = list (repo .iter_commits (f"{ start_commit } ..{ branch } " ))
57+ commits .reverse () # Go from oldest to newest
58+
59+ for commit in commits :
60+ repo .git .checkout (commit .hexsha , detach = True )
61+ try :
62+ result = subprocess .run (
63+ eval_cmd , shell = True , capture_output = True , text = True , check = True
64+ )
65+ results [commit .hexsha ] = result .stdout
66+ except subprocess .CalledProcessError as e :
67+ print (f"Error running eval command for commit { commit .hexsha } : { e } " )
68+ results [commit .hexsha ] = e .stdout if e .stdout else str (e )
69+
70+ # Return to original branch
71+ repo .git .checkout (branch )
72+
73+ return results
6274
6375
6476def run_agent_for_repo (
@@ -178,11 +190,6 @@ def run_agent_for_repo(
178190 test_log_dir ,
179191 test_first = True ,
180192 )
181- if agent_config .record_test_for_each_commit :
182- current_commit = local_repo .head .commit .hexsha
183- eval_results [current_commit ] = run_eval_after_each_commit (
184- branch , backend , commit0_config_file
185- )
186193
187194 # after running the agent, update the money display
188195 update_queue .put (
@@ -211,11 +218,6 @@ def run_agent_for_repo(
211218 lint_log_dir ,
212219 lint_first = True ,
213220 )
214- if agent_config .record_test_for_each_commit :
215- current_commit = local_repo .head .commit .hexsha
216- eval_results [current_commit ] = run_eval_after_each_commit (
217- branch , backend , commit0_config_file
218- )
219221
220222 # after running the agent, update the money display
221223 update_queue .put (
@@ -240,11 +242,6 @@ def run_agent_for_repo(
240242 repo_name , agent_config .use_lint_info , commit0_config_file
241243 )
242244 agent_return = agent .run (message , "" , lint_cmd , [f ], file_log_dir )
243- if agent_config .record_test_for_each_commit :
244- current_commit = local_repo .head .commit .hexsha
245- eval_results [current_commit ] = run_eval_after_each_commit (
246- branch , backend , commit0_config_file
247- )
248245
249246 update_queue .put (
250247 (
@@ -253,6 +250,9 @@ def run_agent_for_repo(
253250 )
254251 )
255252 if agent_config .record_test_for_each_commit :
253+ eval_results = run_eval_after_all_changes (
254+ local_repo , example ["base_commit" ], branch , backend , commit0_config_file
255+ )
256256 with open (experiment_log_dir / "eval_results.json" , "w" ) as f :
257257 json .dump (eval_results , f )
258258
0 commit comments