added coverage & fix modal stdout stream errors

wenting-zhao · wenting-zhao · commit f4ff2d019f3a · 2024-09-20T01:40:04.000Z
diff --git a/commit0/harness/execution_context.py b/commit0/harness/execution_context.py
@@ -32,14 +32,6 @@
 )
 
 
-def read_stream(stream: modal.io_streams.StreamReader) -> str:
-    """Read stream"""
-    strings = []
-    for line in stream:
-        strings.append(line)
-    return "\n".join(strings)
-
-
 class ExecutionBackend(StrEnum):
     LOCAL = auto()
     MODAL = auto()
@@ -54,6 +46,7 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[Files] = None,
     ):
         """Create the remote execution context
 
@@ -65,25 +58,13 @@ def __init__(
         self.timeout = timeout
         self.num_cpus = num_cpus
         self.log_dir = log_dir
+        self.files_to_collect = files_to_collect
 
     @abstractmethod
     def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Execute a test command"""
         raise NotImplementedError
 
-    def write_test_output(self, test_output: str, timed_out: bool) -> None:
-        """Write test output"""
-        test_output_path = self.log_dir / "test_output.txt"
-        with open(test_output_path, "w") as f:
-            f.write(test_output)
-            if timed_out:
-                f.write(f"\n\nTimeout error: {self.timeout} seconds exceeded.")
-                raise EvaluationError(
-                    self.spec.repo,
-                    f"Test timed out after {self.timeout} seconds.",
-                    self.logger,
-                )
-
     def __enter__(self):
         return self
 
@@ -106,8 +87,9 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[Files] = None,
     ):
-        super().__init__(spec, logger, timeout, num_cpus, log_dir)
+        super().__init__(spec, logger, timeout, num_cpus, log_dir, files_to_copy=files_to_copy, files_to_collect=files_to_collect)
 
         self.client = docker.from_env()
         self.container = create_container(
@@ -126,17 +108,18 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Exec"""
         output = exec_run_with_timeout(self.container, command, self.timeout)
 
-        # copy back report.json if there is any
-        report_file = Path(self.spec.repo_directory) / "report.json"
-        # Run the test command inside the container to check if the file exists
-        exit_code, test_output = self.container.exec_run(
-            f"test -e {report_file}", demux=True
-        )
-        # Check the exit code of the command
-        if exit_code == 0:
-            copy_from_container(
-                self.container, report_file, self.log_dir / "report.json"
+        for fname in self.files_to_collect:
+            # copy back report.json if there is any
+            file = Path(self.spec.repo_directory) / fname
+            # Run the test command inside the container to check if the file exists
+            exit_code, test_output = self.container.exec_run(
+                f"test -e {file}", demux=True
             )
+            # Check the exit code of the command
+            if exit_code == 0:
+                copy_from_container(
+                    self.container, file, self.log_dir / fname
+                )
         return output
 
     def __exit__(
@@ -158,8 +141,9 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[Files] = None,
     ):
-        super().__init__(spec, logger, timeout, num_cpus, log_dir)
+        super().__init__(spec, logger, timeout, num_cpus, log_dir, files_to_copy=files_to_copy, files_to_collect=files_to_collect)
 
         self.app = modal.App()
 
@@ -176,13 +160,17 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Execute command on modal sandbox"""
         start_time = time.time()
         with modal.Volume.ephemeral() as vol:
-            # copy back report.json if there is any
-            report_file = Path(self.spec.repo_directory) / "report.json"
+            cp_cmd = ""
+            for fname in self.files_to_collect:
+                remote_file = Path(self.spec.repo_directory) / fname
+                curr_cp_cmd = f" && cp {str(remote_file)} /vol/{fname} 2>/dev/null"
+                cp_cmd += curr_cp_cmd
 
+            command += cp_cmd
             self.sandbox = modal.Sandbox.create(
                 "bash",
                 "-c",
-                f"{command} && cp {str(report_file)} /vol/report.json",
+                command,
                 image=self.image,
                 cpu=self.num_cpus,
                 timeout=self.timeout,
@@ -191,26 +179,21 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
             )
             self.sandbox.wait()
 
-            # stdout has been redirected to stderr
-            stdout = read_stream(self.sandbox.stderr)
-
             return_code = self.sandbox.returncode
             # https://github.com/modal-labs/modal-client/blob/d577b2916b5c3bf4ebbcb58fadced84d85e1cf8c/modal/sandbox.py#L413
             if return_code == 124:
                 timed_out = True
             else:
                 timed_out = False
 
-            # copy over report.json from mount
-            with (self.log_dir / "report.json").open("wb") as f:
-                for data in vol.read_file("report.json"):
-                    f.write(data)
+            for fname in self.files_to_collect:
+                with (self.log_dir / fname).open("wb") as f:
+                    for data in vol.read_file(fname):
+                        f.write(data)
 
             self.sandbox.terminate()
-
             end_time = time.time()
-
-            return stdout, timed_out, end_time - start_time
+            return self.sandbox.stderr.read(), timed_out, end_time - start_time
 
     def __exit__(
         self,
diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -114,28 +114,22 @@ def main(
         eval_script={"src": eval_file, "dest": Path("/eval.sh")},
         patch={"src": patch_file, "dest": Path("/patch.diff")},
     )
+    files_to_collect = ["report.json", "coverage.json", "pytest_exit_code.txt", "test_output.txt"]
 
     try:
         with execution_context(
-            spec, logger, timeout, num_cpus, log_dir, files_to_copy
+            spec, logger, timeout, num_cpus, log_dir, files_to_copy, files_to_collect
         ) as context:
             output, timed_out, total_runtime = context.exec_run_with_timeout(
                 "/bin/bash /eval.sh"
             )
-            logger.info(output)
-            test_output = extract_test_output(
-                output, "--json-report --json-report-file=report.json"
-            )
-            context.write_test_output(test_output, timed_out)
-            if stdout:
-                print(test_output)
-            pytest_exit_code = extract_test_output(output, "echo ")
-            try:
-                pytest_exit_code = int(pytest_exit_code)
-            except Exception:
-                raise Exception(
-                    f"Fail to convert pytest_exit_code {pytest_exit_code} into an integer."
+            if timed_out:
+                raise EvaluationError(
+                    self.spec.repo,
+                    f"Test timed out after {timeout} seconds.",
+                    self.logger,
                 )
+        pytest_exit_code = Path(log_dir / "pytest_exit_code.txt").read_text().strip()
         sys.exit(pytest_exit_code)
     except EvaluationError as e:
         error_msg = (
diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py
@@ -153,13 +153,11 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s
         f"git reset --hard {instance['base_commit']}",
         "git apply --allow-empty -v /patch.diff",
         "git status",
-        f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors {{test_ids}}",
-        "echo $?",
+        f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors --cov=. --cov-branch --cov-report json {{test_ids}} > test_output.txt 2>&1",
+        "echo $? > pytest_exit_code.txt",
         f"git reset --hard {instance['base_commit']}",
         "git status",
     ]
-    for i in range(len(eval_script_list)):
-        eval_script_list[i] = f"{eval_script_list[i]} 1>&2"
     return eval_script_list