Skip to content

Commit

Permalink
Merge pull request swe-bench#178 from princeton-nlp/add-schema-version
Browse files Browse the repository at this point in the history
Add schema version to report card
  • Loading branch information
john-b-yang authored Jul 12, 2024
2 parents 7a5729d + 44482e2 commit d99c1c4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
7 changes: 6 additions & 1 deletion swebench/harness/run_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def make_run_report(
full_dataset: list,
client: docker.DockerClient,
run_id: str
):
) -> Path:
"""
Make a final evaluation and run report of the instances that have been run.
Also reports on images and containers that may still running!
Expand All @@ -367,6 +367,9 @@ def make_run_report(
full_dataset (list): List of all instances
client (docker.DockerClient): Docker client
run_id (str): Run ID
Returns:
Path to report file
"""
# instantiate sets to store IDs of different outcomes
completed_ids = set()
Expand Down Expand Up @@ -453,6 +456,7 @@ def make_run_report(
"error_ids": list(sorted(error_ids)),
"unstopped_containers": list(sorted(unstopped_containers)),
"unremoved_images": list(sorted(unremoved_images)),
"schema_version": 2,
}
report_file = Path(
list(predictions.values())[0]["model_name_or_path"].replace("/", "__")
Expand All @@ -462,6 +466,7 @@ def make_run_report(
with open(report_file, "w") as f:
print(json.dumps(report, indent=4), file=f)
print(f"Report written to {report_file}")
return report_file


def get_gold_predictions(dataset_name: str, split: str):
Expand Down
24 changes: 24 additions & 0 deletions tests/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import collections
import json
import docker

from swebench.harness.run_evaluation import make_run_report

TEST_INSTANCE = collections.defaultdict(lambda: "test")
TEST_INSTANCE["PASS_TO_PASS"] = '[]'
TEST_INSTANCE["repo"] = 'pvlib/pvlib-python'
TEST_INSTANCE["version"] = '0.1'
TEST_INSTANCE["FAIL_TO_PASS"] = '[]'

def test_make_run_report(tmpdir) -> None:
client = docker.from_env()
with tmpdir.as_cwd():
output_path = make_run_report(
{"test": {"instance_id": "test", "model_name_or_path": "test"}},
[TEST_INSTANCE],
client,
"test"
)
assert output_path.is_file()
report = json.loads(output_path.read_text())
assert report["schema_version"] == 2

0 comments on commit d99c1c4

Please sign in to comment.