Skip to content

Commit 9da9701

Browse files
committed
Update to evaluation logic
1 parent c17dcff commit 9da9701

File tree

1 file changed

+7
-38
lines changed

1 file changed

+7
-38
lines changed

evaluation/evaluation.py

+7-38
Original file line numberDiff line numberDiff line change
@@ -166,54 +166,23 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
166166
scorecard["patch_lines_del"] = 0
167167
scorecards.append(scorecard)
168168

169-
# Calculate cumulative results
170-
get_ids_with_status = lambda x: [
171-
s[KEY_INSTANCE_ID] for s in scorecards if x in s["statuses"]
172-
]
173-
report = {
174-
"# Not Generated": len(get_ids_with_status("not_generated")),
175-
"# Generated": len(get_ids_with_status("generated")),
176-
"# Applied": len(get_ids_with_status("applied")),
177-
"# Resolved": len(get_ids_with_status("RESOLVED_FULL")),
178-
"# Install Fail": len(get_ids_with_status("install_fail")),
179-
}
180-
print(f"== Evaluation Report ==\n{report}")
181-
182-
report_exits = dict(
183-
Counter([s["exit_status"] if "exit_status" in s else "n/a" for s in scorecards])
184-
)
185-
186169
# Save to summary, scorecard json
187170
path_scorecards = os.path.join(directory, "scorecards.json")
188171
with open(path_scorecards, "w") as f:
189172
json.dump(scorecards, fp=f, indent=2)
190173
print(f"- Wrote per-instance scorecards to {path_scorecards}")
191174

192-
path_results = os.path.join(directory, "results.json")
193-
with open(path_results, "w") as f:
194-
json.dump(
195-
{
196-
"report": report,
197-
"report_exits": report_exits,
198-
"not_generated": get_ids_with_status("not_generated"),
199-
"generated": get_ids_with_status("generated"),
200-
"applied": get_ids_with_status("applied"),
201-
"resolved": get_ids_with_status("RESOLVED_FULL"),
202-
"install_fail": get_ids_with_status("install_fail"),
203-
},
204-
fp=f,
205-
indent=2,
206-
)
207-
print(f"- Wrote summary of run to {path_results}")
208-
209-
# Sanity check against get_model_report
210-
report = get_model_report(
211-
directory_name, pred_path_orig, swe_bench_tasks, log_dir
212-
)
175+
# Get results and write to file
213176
print(f"Reference Report:")
177+
report = get_model_report(directory_name, pred_path_orig, swe_bench_tasks, log_dir)
214178
for k, v in report.items():
215179
print(f"- {k}: {len(v)}")
216180

181+
path_results = os.path.join(directory, "results.json")
182+
with open(path_results, "w") as f:
183+
json.dump(report, f, indent=2)
184+
print(f"- Wrote summary of run to {path_results}")
185+
217186

218187
if __name__ == "__main__":
219188
# Parse arguments

0 commit comments

Comments
 (0)