iskng
diff --git a/‎sweagent/environment/utils.py
Lines changed: 48 additions & 34 deletions b/‎sweagent/environment/utils.py
Lines changed: 48 additions & 34 deletions
diff --git a/‎tests/test_data/data_sources/debug_20240322.json
Lines changed: 1 addition & 0 deletions b/‎tests/test_data/data_sources/debug_20240322.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_data/data_sources/human_eval.json
Lines changed: 1 addition & 0 deletions b/‎tests/test_data/data_sources/human_eval.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_data/data_sources/swe-bench-dev-easy.json
Lines changed: 1 addition & 0 deletions b/‎tests/test_data/data_sources/swe-bench-dev-easy.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_data/data_sources/swe-bench-lite-test.json
Lines changed: 1 addition & 0 deletions b/‎tests/test_data/data_sources/swe-bench-lite-test.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_env.py
Lines changed: 3 additions & 0 deletions b/‎tests/test_env.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/test_utils.py
Lines changed: 7 additions & 0 deletions b/‎tests/test_utils.py
Lines changed: 7 additions & 0 deletions
@@ -12,7 +12,6 @@
 import tempfile
 import time
 import traceback
-import dataclasses
 
 from datasets import load_dataset, load_from_disk
 from ghapi.all import GhApi
@@ -390,30 +389,6 @@ def get_problem_statement_from_github_issue(owner: str, repo: str, issue_number:
     return f"{title}\n{body}\n"
 
 
-@dataclasses.dataclass
-class Instance:
-    repo: str
-    base_commit: str
-    version: str
-    problem_statement: str
-    instance_id: str
-    # todo: This field is only needed while swe_env is using some questionable logic
-    # to determine whether to clone from a mirror or not. This should be removed in the future.
-    # Values: 'swe-bench' (loaded from json/jsonl for swe-bench style inference),
-    # 'online' (loaded from github issue or similar) or 'local' (loaded from local file)
-    problem_statement_source: str = "swe-bench"
-    repo_type: str = "github"
-
-    def _validate(self):
-        if self.repo_type not in {"github", "local"}:
-            raise ValueError(f"Invalid repo type: {self.repo_type=}")
-        if self.repo_type == "github" and self.repo.count("/") != 1:
-            raise ValueError(f"Invalid repo format for {self.repo_type=}: {self.repo=}")
-
-    def __post_init__(self):
-        self._validate()
-
-
 class InstanceBuilder:
     def __init__(self, token: Optional[str] = None):
         """This helper class is used to build the data for an instance object, 
@@ -481,7 +456,41 @@ def set_repo_info(self, repo: str, base_commit: Optional[str] = None):
         else:
             raise ValueError(f"Could not determine repo path from {repo=}.")
 
-    def build(self) -> Instance: return Instance(**self.args)
+    def set_from_dict(self, instance_dict: Dict[str, Any]):
+        self.args |= instance_dict
+    
+    def set_missing_fields(self):
+        # todo: This field is only needed while swe_env is using some questionable logic
+        # to determine whether to clone from a mirror or not. This should be removed in the future.
+        # Values: 'swe-bench' (loaded from json/jsonl for swe-bench style inference),
+        # 'online' (loaded from github issue or similar) or 'local' (loaded from local file)
+        if "problem_statement_source" not in self.args:
+            self.args["problem_statement_source"] = "swe-bench"
+        if "repo_type" not in self.args: 
+            self.args["repo_type"] = "github"
+    
+    def validate(self):
+        required_fields = [
+            "problem_statement",
+            "instance_id",
+            "repo",
+            "repo_type",
+            "base_commit",
+            "version",
+            "problem_statement_source",
+        ]
+        if not all(x in self.args for x in required_fields):
+            missing = set(required_fields) - set(self.args.keys())
+            raise ValueError(f"Missing required fields: {missing=}")
+        if self.args["repo_type"] not in {"github", "local"}:
+            raise ValueError(f"Invalid repo type: {self.args['repo_type']=}")
+        if self.args["repo_type"] == "github" and self.args["repo"].count("/") != 1:
+            raise ValueError(f"Invalid repo format for {self.args['repo_type']=}: {self.args['repo']=}")
+    
+    def build(self) -> Dict[str, Any]:
+        self.set_missing_fields()
+        self.validate()
+        return self.args
 
 
 def get_instances(
@@ -501,17 +510,22 @@ def get_instances(
     Returns:
         List of instances as dictionaries
     """
-    def set_missing_keys(instances):
-        return [dataclasses.asdict(Instance(**inst)) for inst in instances]
+    def instance_from_dict(instances):
+        ib = InstanceBuilder(token=token)
+        ib.set_from_dict(instances)
+        return ib.build()
+
+    def postproc_instance_list(instances):
+        return [instance_from_dict(x) for x in instances]
 
 
     # If file_path is a directory, attempt load from disk
     if os.path.isdir(file_path):
         try:
             dataset_or_dict = load_from_disk(file_path)
             if isinstance(dataset_or_dict, dict):
-                return set_missing_keys(dataset_or_dict[split])
-            return set_missing_keys(dataset_or_dict)
+                return postproc_instance_list(dataset_or_dict[split])
+            return postproc_instance_list(dataset_or_dict)
         except FileNotFoundError:
             # Raised by load_from_disk if the directory is not a dataset directory
             pass
@@ -527,24 +541,24 @@ def set_missing_keys(instances):
         else:
             raise ValueError(f"Could not determine repo path from {file_path=}, {repo_path=}")
 
-        return [dataclasses.asdict(ib.build())]
+        return [ib.build()]
 
     if base_commit is not None:
         raise ValueError("base_commit must be None if data_path is not a github issue url")
 
     # If file_path is a file, load the file
     if file_path.endswith(".json"):
-        return set_missing_keys(json.load(open(file_path)))
+        return postproc_instance_list(json.load(open(file_path)))
     if file_path.endswith(".jsonl"):
-        return set_missing_keys([json.loads(x) for x in open(file_path, 'r').readlines()])
+        return postproc_instance_list([json.loads(x) for x in open(file_path, 'r').readlines()])
 
     if repo_path:
         msg = "repo_path must be empty if data_path is not a github url or local repo url"
         raise ValueError(msg)
 
     # Attempt load from HF datasets as a last resort
     try:
-        return set_missing_keys(load_dataset(file_path, split=split))
+        return postproc_instance_list(load_dataset(file_path, split=split))
     except:
         raise ValueError(
             f"Could not load instances from {file_path}. "
 
@@ -0,0 +1 @@
+[{"instance_id": "swe-bench__humaneval-30", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..52ecda2\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import get_positive\n+\n+\n+METADATA = {}\n+\n+\n+def check(candidate):\n+    assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n+    assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n+    assert candidate([-1, -2]) == []\n+    assert candidate([]) == []\n+\n+check(get_positive)\n", "base_commit": "0880311", "base_commit_with_tests": "b2e380b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-85", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..13d6e1f\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,12 @@\n+from main import add\n+def check(candidate):\n+\n+    # Check some simple cases\n+    assert candidate([4, 88]) == 88\n+    assert candidate([4, 5, 6, 7, 2, 122]) == 122\n+    assert candidate([4, 0, 6, 7]) == 0\n+    assert candidate([4, 4, 6, 8]) == 12\n+\n+    # Check some edge cases that are easy to work out by hand.\n+    \n+check(add)\n", "base_commit": "2de55bc", "base_commit_with_tests": "c8c997b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-22", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..d881459\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,14 @@\n+from main import filter_integers\n+\n+\n+METADATA = {\n+    'author': 'jt',\n+    'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+    assert candidate([]) == []\n+    assert candidate([4, {}, [], 23.2, 9, 'adasd']) == [4, 9]\n+    assert candidate([3, 'c', 3, 3, 'a', 'b']) == [3, 3, 3]\n+check(filter_integers)\n", "base_commit": "f0dbe5e", "base_commit_with_tests": "55cc474", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-104", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..617da5a\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import unique_digits\n+def check(candidate):\n+\n+    # Check some simple cases\n+    assert candidate([15, 33, 1422, 1]) == [1, 15, 33]\n+    assert candidate([152, 323, 1422, 10]) == []\n+    assert candidate([12345, 2033, 111, 151]) == [111, 151]\n+    assert candidate([135, 103, 31]) == [31, 135]\n+\n+    # Check some edge cases that are easy to work out by hand.\n+    assert True\n+\n+check(unique_digits)\n", "base_commit": "b52ee85", "base_commit_with_tests": "4a92a50", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-0", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..2d57340\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,19 @@\n+from main import has_close_elements\n+\n+\n+METADATA = {\n+    'author': 'jt',\n+    'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n+    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n+    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n+    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n+    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n+    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n+    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n+\n+check(has_close_elements)\n", "base_commit": "afba737", "base_commit_with_tests": "c7e41b2", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}]
@@ -66,6 +66,9 @@ def test_execute_environment(tmp_path, test_env_args):
     test_env_args = dataclasses.replace(test_env_args, environment_setup=env_config_path)
     env = SWEEnv(test_env_args)
     env.reset()
+
+
+@pytest.mark.slow
 def test_open_pr(test_env_args):
     env = SWEEnv(test_env_args)
     env.reset()
 
@@ -140,3 +140,10 @@ def test_get_instance_gh_issue_gh_repo(tmp_path):
     assert "SyntaxError" in instance["problem_statement"]
     assert len(instance["base_commit"]) > 10
     assert instance["version"]
+
+
+def test_load_instances(test_data_path, caplog):
+    test_data_sources = test_data_path / "data_sources"
+    examples = list(test_data_sources.iterdir())
+    for example in examples:
+        get_instances(file_path=str(example))
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+[{"instance_id": "swe-bench__humaneval-30", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..52ecda2\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import get_positive\n+\n+\n+METADATA = {}\n+\n+\n+def check(candidate):\n+ assert candidate([-1, -2, 4, 5, 6]) == [4, 5, 6]\n+ assert candidate([5, 3, -5, 2, 3, 3, 9, 0, 123, 1, -10]) == [5, 3, 2, 3, 3, 9, 123, 1]\n+ assert candidate([-1, -2]) == []\n+ assert candidate([]) == []\n+\n+check(get_positive)\n", "base_commit": "0880311", "base_commit_with_tests": "b2e380b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-85", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..13d6e1f\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,12 @@\n+from main import add\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([4, 88]) == 88\n+ assert candidate([4, 5, 6, 7, 2, 122]) == 122\n+ assert candidate([4, 0, 6, 7]) == 0\n+ assert candidate([4, 4, 6, 8]) == 12\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ \n+check(add)\n", "base_commit": "2de55bc", "base_commit_with_tests": "c8c997b", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-22", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..d881459\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,14 @@\n+from main import filter_integers\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([]) == []\n+ assert candidate([4, {}, [], 23.2, 9, 'adasd']) == [4, 9]\n+ assert candidate([3, 'c', 3, 3, 'a', 'b']) == [3, 3, 3]\n+check(filter_integers)\n", "base_commit": "f0dbe5e", "base_commit_with_tests": "55cc474", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-104", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..617da5a\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,13 @@\n+from main import unique_digits\n+def check(candidate):\n+\n+ # Check some simple cases\n+ assert candidate([15, 33, 1422, 1]) == [1, 15, 33]\n+ assert candidate([152, 323, 1422, 10]) == []\n+ assert candidate([12345, 2033, 111, 151]) == [111, 151]\n+ assert candidate([135, 103, 31]) == [31, 135]\n+\n+ # Check some edge cases that are easy to work out by hand.\n+ assert True\n+\n+check(unique_digits)\n", "base_commit": "b52ee85", "base_commit_with_tests": "4a92a50", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}, {"instance_id": "swe-bench__humaneval-0", "problem_statement": "I have a function that needs implementing, can you help?", "created_at": "2023110716", "version": "1.0", "test_patch": "diff --git a/test.py b/test.py\nnew file mode 100644\nindex 0000000..2d57340\n--- /dev/null\n+++ b/test.py\n@@ -0,0 +1,19 @@\n+from main import has_close_elements\n+\n+\n+METADATA = {\n+ 'author': 'jt',\n+ 'dataset': 'test'\n+}\n+\n+\n+def check(candidate):\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\n+ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\n+ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\n+ assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\n+ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n+\n+check(has_close_elements)\n", "base_commit": "afba737", "base_commit_with_tests": "c7e41b2", "environment_setup_commit": null, "hints_text": null, "repo": "swe-bench/humaneval", "FAIL_TO_PASS": "", "PASS_TO_PASS": ""}]