19
19
from simple_parsing .helpers .serialization .serializable import FrozenSerializable
20
20
import yaml
21
21
from sweagent .environment .utils import (
22
+ copy_anything_to_container ,
22
23
copy_file_to_container ,
23
24
format_trajectory_markdown ,
24
25
get_container ,
25
26
get_gh_issue_data ,
26
27
get_instances ,
27
- is_from_github_url ,
28
28
parse_gh_issue_url ,
29
29
parse_gh_repo_url ,
30
30
read_with_timeout ,
53
53
class EnvironmentArguments (FrozenSerializable ):
54
54
"""Configure data sources and setup instructions for th environment in which we solve the tasks.
55
55
"""
56
+ # Source of issue statement/problem statement. To run over a batch of issues: Path to a data file
57
+ # (`json`, `jsonl`) or directory. To run over single issue: github issue url or path to markdown file
58
+ # with problem statement.
56
59
data_path : str
57
60
image_name : str
58
61
split : str = "dev"
@@ -62,11 +65,13 @@ class EnvironmentArguments(FrozenSerializable):
62
65
timeout : int = 35
63
66
verbose : bool = False
64
67
no_mirror : bool = False
65
- # Custom environment setup. Currently only used when data_path is a GitHub URL .
68
+ # Custom environment setup. Currently only used when data_path points to a single issue .
66
69
# This needs to be either a string pointing to a yaml file (with yaml, yml file extension)
67
70
# or a shell script (with sh extension).
68
71
# See https://github.com/princeton-nlp/SWE-agent/pull/153 for more information
69
72
environment_setup : Optional [str ] = None
73
+ # Only used when running on single issue. Path to local repository or github repository.
74
+ repo_path : str = ""
70
75
71
76
72
77
class SWEEnv (gym .Env ):
@@ -84,7 +89,6 @@ def __init__(self, args: EnvironmentArguments):
84
89
self .logger = logger
85
90
self .persistent = args .container_name is not None
86
91
self .returncode = None
87
- self .is_from_github_url = is_from_github_url (args .data_path )
88
92
if not self .args .verbose :
89
93
self .logger .disabled = True
90
94
@@ -107,7 +111,9 @@ def __init__(self, args: EnvironmentArguments):
107
111
108
112
# Load Task Instances
109
113
self .data_path = self .args .data_path
110
- self .data = get_instances (self .data_path , self .args .base_commit , self .args .split , token = self ._github_token )
114
+ self .data = get_instances (self .data_path , self .args .base_commit , self .args .split , token = self ._github_token , repo_path = self .args .repo_path )
115
+ #: Instance we're currently processing. Gets set in self.reset.
116
+ self .record = None
111
117
self .logger .info (f"💽 Loaded dataset from { self .data_path } " )
112
118
113
119
# Establish connection with execution container
@@ -119,7 +125,48 @@ def __init__(self, args: EnvironmentArguments):
119
125
self .idx = 0
120
126
self .clean_multi_line_functions = lambda x : x
121
127
122
- def reset (self , index : int = None , apply_test_patch : bool = False ) -> Tuple [str , dict ]:
128
+ @property
129
+ def _repo_name (self ) -> str :
130
+ """Name of the local copy of the repository"""
131
+ assert self .record is not None
132
+ return self .record ["repo" ].replace ("/" , "__" )
133
+
134
+ def _copy_repo (self ) -> str :
135
+ """Clone/copy repository/codebase in container
136
+ Returns:
137
+ folder name of clone
138
+ """
139
+ assert self .record is not None # mypy
140
+ if self .record ["repo_type" ] == "local" :
141
+ copy_anything_to_container (self .container_obj , self .record ["repo" ].removeprefix ("local://" ), "/" + self ._repo_name )
142
+ self .communicate_with_handling (
143
+ input = f"chown -R root:root { self ._repo_name } " ,
144
+ error_msg = "Failed to change permissions on copied repository" ,
145
+ )
146
+ return self ._repo_name
147
+ assert self .record ["repo_type" ] == "github"
148
+ token_prefix = ""
149
+ if self ._github_token :
150
+ token_prefix = f"{ self ._github_token } @"
151
+ # fixme: This if statement is brittle and should probably be replaced with better logic
152
+ if not self .args .no_mirror and self .record ["problem_statement_source" ] == "swe-bench" :
153
+ self .logger .info (f"{ self ._repo_name } not found in container, cloning..." )
154
+ self .communicate_with_handling (
155
+ input = f"git clone https://{ token_prefix } github.com/swe-bench/{ self ._repo_name } .git" ,
156
+ error_msg = "Failed to clone repository from mirror" ,
157
+ timeout_duration = LONG_TIMEOUT ,
158
+ )
159
+ return self ._repo_name
160
+ else :
161
+ logger .info (f"Trying to clone from non-mirror..." )
162
+ self .communicate_with_handling (
163
+ input = f"git clone https://{ token_prefix } github.com/{ self .record ['repo' ]} .git { self ._repo_name } " ,
164
+ error_msg = "Failed to clone repository from non-mirror" ,
165
+ timeout_duration = LONG_TIMEOUT ,
166
+ )
167
+ return self ._repo_name
168
+
169
+ def reset (self , index : Optional [int ] = None , apply_test_patch : bool = False ) -> Tuple [Optional [str ], dict ]:
123
170
"""
124
171
Function to reset container between each task instance.
125
172
* Clones instance's repository
@@ -151,30 +198,13 @@ def reset(self, index: int = None, apply_test_patch: bool = False) -> Tuple[str,
151
198
# Clone repository if not already cloned
152
199
self .communicate (input = "cd /" )
153
200
folders = self .communicate (input = "ls" ).split ("\n " )
154
- repo_name = self .record ["repo" ].replace ("/" , "__" )
155
- if repo_name not in folders :
156
- token_prefix = ""
157
- if self ._github_token :
158
- token_prefix = f"{ self ._github_token } @"
159
- if not self .args .no_mirror and not self .is_from_github_url :
160
- self .logger .info (f"{ repo_name } not found in container, cloning..." )
161
- self .communicate_with_handling (
162
- input = f"git clone https://{ token_prefix } github.com/swe-bench/{ repo_name } .git" ,
163
- error_msg = "Failed to clone repository from mirror" ,
164
- timeout_duration = LONG_TIMEOUT ,
165
- )
166
- else :
167
- logger .info (f"Trying to clone from non-mirror..." )
168
- self .communicate_with_handling (
169
- input = f"git clone https://{ token_prefix } github.com/{ self .record ['repo' ]} .git { repo_name } " ,
170
- error_msg = "Failed to clone repository from non-mirror" ,
171
- timeout_duration = LONG_TIMEOUT ,
172
- )
201
+ if self ._repo_name not in folders :
202
+ self ._copy_repo ()
173
203
174
204
# Clean repository of any modifications + Checkout base commit
175
205
for cmd in [
176
206
"echo -n > /root/files_to_edit.txt" ,
177
- f"cd { repo_name } " ,
207
+ f"cd { self . _repo_name } " ,
178
208
"export ROOT=$(pwd -P)" ,
179
209
"git status" ,
180
210
"git restore ." ,
@@ -559,14 +589,15 @@ def install_env(self) -> None:
559
589
"""
560
590
Creates conda environment and installs third party dependencies to allow code execution
561
591
"""
562
- if self .is_from_github_url and self .args .environment_setup is None :
592
+ assert self .record is not None # mypy
593
+ if (self .record ["problem_statement_source" ] != "swe-bench" or \
594
+ self .record ["repo_type" ] == "local" ) and self .args .environment_setup is None :
563
595
logger .warning ((
564
596
"install_environment is set to True, but the data path is a GitHub URL "
565
597
"without an environment config file (environment_config key/flag). "
566
598
"Skipping conda environment installation."
567
599
))
568
600
return
569
- repo_name = self .record ["repo" ].replace ("/" , "__" )
570
601
if self .args .environment_setup is not None :
571
602
assert isinstance (self .args .environment_setup , (str , os .PathLike ))
572
603
if Path (self .args .environment_setup ).suffix in [".yml" , ".yaml" ]:
@@ -592,7 +623,7 @@ def install_env(self) -> None:
592
623
)
593
624
raise ValueError (msg ) from e
594
625
# Create environment if does not exist yet
595
- env_name = f"{ repo_name } __{ self .record ['version' ]} "
626
+ env_name = f"{ self . _repo_name } __{ self .record ['version' ]} "
596
627
env_check = self .communicate (
597
628
f"conda env list | grep { env_name } " , timeout_duration = LONG_TIMEOUT
598
629
)
@@ -676,7 +707,7 @@ def install_env(self) -> None:
676
707
pre_install_cmd ,
677
708
error_msg = "Pre-install commands failed to execute successfully" ,
678
709
)
679
- self .logger .info (f"Installing { repo_name } at base commit..." )
710
+ self .logger .info (f"Installing { self . _repo_name } at base commit..." )
680
711
if "install" in install_configs :
681
712
install_cmd = install_configs ["install" ]
682
713
self .communicate_with_handling (
0 commit comments