linted

celine-lee · celine-lee · commit 6b949c209044 · 2024-09-26T19:00:14.000-04:00
diff --git a/docs/render_submissions.py b/docs/render_submissions.py
@@ -39,11 +39,13 @@ def get_pytest_info(path_to_logs, repo_name, branch_name):
         }
         report_file_path = os.path.join(path_to_logs, pytest_hash, "report.json")
         if not os.path.exists(report_file_path):
-            if os.path.exists(os.path.join(path_to_logs, pytest_hash, "test_output.txt")):
+            if os.path.exists(
+                os.path.join(path_to_logs, pytest_hash, "test_output.txt")
+            ):
                 reason_for_failure = open(
                     os.path.join(path_to_logs, pytest_hash, "test_output.txt")
                 ).read()
-            else: 
+            else:
                 reason_for_failure = "Unknown failure."
             pytest_info[testname]["failed_to_run"] = reason_for_failure
             return pytest_info
@@ -160,6 +162,7 @@ def get_blank_repo_metrics(
 
     return blank_repo_metrics
 
+
 leaderboard_header = """\n\n## Leaderboard ({split})
 | Name | Repos Resolved (/{num_repos}) | Total Tests Passed (/{total_num_tests}) Test Duration (s) | Date | Analysis | Github | 
 |------|:-------------------------:|:--------------------:|:--------------------:|:----------:|----|----| """
@@ -174,20 +177,26 @@ def get_blank_repo_metrics(
 |:---------|:-----:|
 """
 
+
 def render_mds(overwrite_previous, subfolder="docs"):
     leaderboard = {}
 
-    split_to_total_tests = {"lite": 3628, "all": 140926} # hard-coded to skip running it later
+    split_to_total_tests = {
+        "lite": 3628,
+        "all": 140926,
+    }  # hard-coded to skip running it later
     for split in tqdm.tqdm(["lite", "all"]):
         num_repos = len(SPLIT[split])
         # total_num_tests = 0
         # for repo_name in SPLIT[split]:
         #     repo_tests = subprocess.run(['commit0', 'get-tests', repo_name], capture_output=True, text=True).stdout.strip()
         #     total_num_tests += len(repo_tests.splitlines())
-        leaderboard[
-            split
-        ] = leaderboard_header.format(split=split, num_repos=num_repos, total_num_tests=split_to_total_tests[split])
-        
+        leaderboard[split] = leaderboard_header.format(
+            split=split,
+            num_repos=num_repos,
+            total_num_tests=split_to_total_tests[split],
+        )
+
     for org_path in tqdm.tqdm(glob.glob(os.path.join(analysis_files_path, "*"))):
         org_name = os.path.basename(org_path)
         if org_name in {"blank", "repos", "submission_repos"}:
@@ -204,58 +213,73 @@ def render_mds(overwrite_previous, subfolder="docs"):
             display_name = submission_info["display_name"]
             submission_date = submission_info["submission_date"]
             branch_name = submission_info["branch"]
-            org_branch_filepath = os.path.join(subfolder, f"analysis_{org_name}_{branch_name}.md")
+            org_branch_filepath = os.path.join(
+                subfolder, f"analysis_{org_name}_{branch_name}.md"
+            )
             write_submission = True
-            if os.path.exists(org_branch_filepath) and not overwrite_previous: write_submission = False
+            if os.path.exists(org_branch_filepath) and not overwrite_previous:
+                write_submission = False
 
-            if write_submission: submission_page = submission_table_header.format(display_name=display_name, split=split)
+            if write_submission:
+                submission_page = submission_table_header.format(
+                    display_name=display_name, split=split
+                )
 
             for repo_name, repo_pytest_results in branch_metrics.items():
-                if repo_name == "submission_info": continue
-                if write_submission: 
+                if repo_name == "submission_info":
+                    continue
+                if write_submission:
                     submission_repo_page = f"# **{display_name}**: {repo_name}"
-                    org_branch_repo_filepath = os.path.join(subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md")
-                if isinstance(repo_pytest_results, str): 
+                    org_branch_repo_filepath = os.path.join(
+                        subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md"
+                    )
+                if isinstance(repo_pytest_results, str):
                     submission_repo_page = f"# **{display_name}**: {repo_name}\n\n## Failed to clone\n\n{repo_pytest_results}"
-                    org_branch_repo_filepath = os.path.join(subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md")
-                    github_hyperlink = f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                    org_branch_repo_filepath = os.path.join(
+                        subfolder, f"analysis_{org_name}_{branch_name}_{repo_name}.md"
+                    )
+                    github_hyperlink = (
+                        f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                    )
                     if branch_name == "reference":
                         github_hyperlink = f"{project_page_link}/{repo_name}"
-                    submission_page = submission_table_header.format(display_name=display_name, split=split) + (
+                    submission_page = submission_table_header.format(
+                        display_name=display_name, split=split
+                    ) + (
                         f"| {repo_name} | No; Failed to clone. | - | - | "
                         f"[Analysis](/{f'analysis_{org_name}_{branch_name}_{repo_name}'}) | "
                         f"[Github]({github_hyperlink}) |"
                     )
-                    back_button = (
-                        f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
-                    )
-                    with open(
-                        org_branch_repo_filepath, "w"
-                    ) as wf:
+                    back_button = f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
+                    with open(org_branch_repo_filepath, "w") as wf:
                         wf.write(back_button + submission_repo_page)
                     continue
 
                 for pytest_group, pytest_info in repo_pytest_results.items():
                     pytest_group = os.path.basename(pytest_group.strip("/"))
-                    patch_diff = (
-                        f"""\n\n## Patch diff\n```diff\n{pytest_info['patch_diff']}```"""
-                    )
+                    patch_diff = f"""\n\n## Patch diff\n```diff\n{pytest_info['patch_diff']}```"""
                     if "failed_to_run" in pytest_info:
                         resolved = False
                         if write_submission:
-                            submission_repo_page += (f"\n## Failed to run pytests for test `{pytest_group}`\n"
-                                                     f"```\n{pytest_info['failed_to_run']}\n```")
+                            submission_repo_page += (
+                                f"\n## Failed to run pytests for test `{pytest_group}`\n"
+                                f"```\n{pytest_info['failed_to_run']}\n```"
+                            )
                             pytest_details = "Pytest failed"
                             duration = "Failed."
                     else:
                         resolved = ("failed" not in pytest_info["summary"]) or (
                             pytest_info["summary"]["failed"] == 0
                         )
                         if write_submission:
-                            submission_repo_page += pytest_summary_table_header.format(pytest_group=pytest_group)
+                            submission_repo_page += pytest_summary_table_header.format(
+                                pytest_group=pytest_group
+                            )
                             for category, count in pytest_info["summary"].items():
                                 if category not in {"duration"}:
-                                    submission_repo_page += f"""| {category} | {count} |\n"""
+                                    submission_repo_page += (
+                                        f"""| {category} | {count} |\n"""
+                                    )
                                 else:
                                     submission_repo_page += (
                                         f"""| {category} | {float(count):.2f}s |\n"""
@@ -275,21 +299,19 @@ def render_mds(overwrite_previous, subfolder="docs"):
                             pytest_details = f"{pytest_info['summary']['passed']} / {pytest_info['summary']['collected']}"
                             duration = f"{pytest_info['duration']:.2f}"
                     break
-                if write_submission: 
-                    github_hyperlink = f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                if write_submission:
+                    github_hyperlink = (
+                        f"{project_page_link}/{repo_name}/tree/{branch_name}"
+                    )
                     if branch_name == "reference":
-                        github_hyperlink = f"{project_page_link}/{repo_name}" 
+                        github_hyperlink = f"{project_page_link}/{repo_name}"
                     submission_page += (
                         f"\n| {repo_name} | {'Yes' if resolved else 'No'} | {pytest_details} | "
                         f"{duration} | [Analysis](/{f'analysis_{org_name}_{branch_name}_{repo_name}'}) | "
                         f"[Github]({github_hyperlink}) |"
                     )
-                    back_button = (
-                        f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
-                    )
-                    with open(
-                        org_branch_repo_filepath, "w"
-                    ) as wf:
+                    back_button = f"[back to {display_name} summary](/{f'analysis_{org_name}_{branch_name}'})\n\n"
+                    with open(org_branch_repo_filepath, "w") as wf:
                         wf.write(back_button + submission_repo_page + patch_diff)
             if write_submission:
                 back_button = f"[back to all submissions](/{f'analysis'})\n\n"
@@ -307,7 +329,6 @@ def render_mds(overwrite_previous, subfolder="docs"):
                 f"{github_link}|"
             )
 
-
     leaderboard_filepath = os.path.join(subfolder, "analysis.md")
     with open(leaderboard_filepath, "w") as wf:
         wf.write(leaderboard["lite"] + leaderboard["all"])
@@ -319,13 +340,19 @@ def get_args():
         "--do_setup", action="store_true", help="Run commit0 setup with specified split"
     )
     parser.add_argument(
-        "--get_blank_details", action="store_true", help="Get difficulty metrics of blank repository"
+        "--get_blank_details",
+        action="store_true",
+        help="Get difficulty metrics of blank repository",
     )
     parser.add_argument(
-        "--get_reference_details", action="store_true", help="Get pytest results from reference"
+        "--get_reference_details",
+        action="store_true",
+        help="Get pytest results from reference",
     )
     parser.add_argument(
-        "--analyze_submissions", action="store_true", help="Get pytest results from submissions with split"
+        "--analyze_submissions",
+        action="store_true",
+        help="Get pytest results from submissions with split",
     )
     parser.add_argument("--render_webpages", action="store_true")
     parser.add_argument("--split", type=str, help="all or lite")
@@ -334,7 +361,9 @@ def get_args():
         "--tokenizer_name", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct"
     )
     parser.add_argument(
-        "--overwrite_previous_eval", action="store_true", help="Overwrite cached pytest info"
+        "--overwrite_previous_eval",
+        action="store_true",
+        help="Overwrite cached pytest info"
         # TODO add finer granularity so can specify which ones to overwrite
     )
 
@@ -391,7 +420,9 @@ def main(args):
         commit0_dot_file_path = os.path.join(
             analysis_files_path, "repos", org_name, branch_name, ".commit0.yaml"
         )
-        submission_repos_path = os.path.join(analysis_files_path, "repos", org_name, branch_name)
+        submission_repos_path = os.path.join(
+            analysis_files_path, "repos", org_name, branch_name
+        )
         if args.do_setup:
             os.system(
                 f"commit0 setup {args.split} --base-dir {submission_repos_path} "
@@ -400,22 +431,24 @@ def main(args):
         submission_metrics_output_file = os.path.join(
             analysis_files_path, org_name, f"{branch_name}.json"
         )
-        submission_details = {"submission_info": {
-            "org_name": org_name,
-            "branch": branch_name,
-            "display_name": "Reference (Gold)",
-            "submission_date": "NA",
-            "split": args.split,
-            "project_page": "https://github.com/commit-0",
-        }}
+        submission_details = {
+            "submission_info": {
+                "org_name": org_name,
+                "branch": branch_name,
+                "display_name": "Reference (Gold)",
+                "submission_date": "NA",
+                "split": args.split,
+                "project_page": "https://github.com/commit-0",
+            }
+        }
 
         os.makedirs(os.path.join(analysis_files_path, org_name), exist_ok=True)
         need_re_eval = False
         for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
             if os.path.exists(os.path.join(repo_log_path, branch_name)):
                 if args.overwrite_previous_eval:
                     shutil.rmtree(os.path.join(repo_log_path, branch_name))
-            else: 
+            else:
                 need_re_eval = True
         if args.overwrite_previous_eval or need_re_eval:
             os.system(
@@ -431,27 +464,39 @@ def main(args):
             path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}"
             pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name)
             submission_details[repo_name] = pytest_results
-        json.dump(submission_details, open(submission_metrics_output_file, "w"), indent=4)
+        json.dump(
+            submission_details, open(submission_metrics_output_file, "w"), indent=4
+        )
         print(f"Saved pytest info to {submission_metrics_output_file}")
 
     if args.analyze_submissions:
         for submission in tqdm.tqdm(submission_dataset):
             submission_details = {"submission_info": submission}
             branch_name = submission["branch"]
             org_name = submission["org_name"]
-            split = submission['split']
-            if split != args.split: continue
+            split = submission["split"]
+            if split != args.split:
+                continue
             submission_metrics_output_file = os.path.join(
                 analysis_files_path, org_name, f"{branch_name}.json"
             )
-            if os.path.exists(submission_metrics_output_file) and not args.overwrite_previous_eval:
+            if (
+                os.path.exists(submission_metrics_output_file)
+                and not args.overwrite_previous_eval
+            ):
                 continue
-            submission_repos_path = os.path.join(analysis_files_path, "submission_repos", org_name, branch_name)
+            submission_repos_path = os.path.join(
+                analysis_files_path, "submission_repos", org_name, branch_name
+            )
             if os.path.exists(submission_repos_path):
                 shutil.rmtree(submission_repos_path)
             os.makedirs(os.path.join(analysis_files_path, org_name), exist_ok=True)
             commit0_dot_file_path = os.path.join(
-                analysis_files_path, "submission_repos", org_name, branch_name, ".commit0.yaml"
+                analysis_files_path,
+                "submission_repos",
+                org_name,
+                branch_name,
+                ".commit0.yaml",
             )
             for repo_log_path in glob.glob(f"{os.getcwd()}/logs/pytest/*"):
                 if os.path.exists(os.path.join(repo_log_path, branch_name)):
@@ -468,7 +513,8 @@ def main(args):
                     clone_repo(clone_url, clone_dir, branch_name, logger)
                 except Exception as e:
                     submission_details[repo_name] = f"Error cloning: {e}"
-                    if os.path.exists(clone_dir): shutil.rmtree(clone_dir)
+                    if os.path.exists(clone_dir):
+                        shutil.rmtree(clone_dir)
             # after successfully setup, write the commit0 dot file
             write_commit0_dot_file(
                 commit0_dot_file_path,
@@ -488,16 +534,19 @@ def main(args):
                 repo_name = example["repo"].split("/")[-1]
                 if split != "all" and repo_name not in SPLIT[split]:
                     continue
-                if repo_name in submission_details: # Failed to clone earlier, skip.
+                if repo_name in submission_details:  # Failed to clone earlier, skip.
                     continue
                 path_to_logs = f"{os.getcwd()}/logs/pytest/{repo_name}/{branch_name}"
                 pytest_results = get_pytest_info(path_to_logs, repo_name, branch_name)
                 submission_details[repo_name] = pytest_results
-            json.dump(submission_details, open(submission_metrics_output_file, "w"), indent=4)
+            json.dump(
+                submission_details, open(submission_metrics_output_file, "w"), indent=4
+            )
             print(f"Saved pytest info to {submission_metrics_output_file}")
 
     if args.render_webpages:
         # Render only updated leaderboard and new submissions
         render_mds(args.overwrite_previous_eval)
 
+
 main(get_args())