|
| 1 | +import os |
| 2 | +import re |
| 3 | +import subprocess |
| 4 | +import tempfile |
| 5 | +import argparse |
| 6 | +from collections import defaultdict |
| 7 | + |
| 8 | +help_text = """ |
| 9 | +To use this script, pass the URL of a GitHub Gist as an argument. The Gist should contain the |
| 10 | +exported MarkDown output of a MRVA run. |
| 11 | +
|
| 12 | +The script clones the Gist to a temporary directory, and constructs a DCA source suite that covers the same repos/SHAs that had results in the Gist. |
| 13 | +
|
| 14 | +Additionally, you can limit the list of repos to just the ones for which number of results are within a given range, by passing the --min and --max arguments. |
| 15 | +""" |
| 16 | + |
| 17 | +def clone_gist(gist_url, repo_dir): |
| 18 | + try: |
| 19 | + subprocess.run( |
| 20 | + ["gh", "gist", "clone", gist_url, repo_dir], |
| 21 | + check=True, |
| 22 | + stderr=subprocess.DEVNULL |
| 23 | + ) |
| 24 | + except subprocess.CalledProcessError: |
| 25 | + print(f"Failed to clone the gist from {gist_url}") |
| 26 | + subprocess.run(["rm", "-rf", repo_dir]) |
| 27 | + exit(1) |
| 28 | + |
| 29 | +def get_mrva_test_name(repo_dir): |
| 30 | + """ |
| 31 | + Returns a kebab-case name for the MRVA test, based on the first header of the _summary.md file. |
| 32 | + """ |
| 33 | + # Format of first header: ### Results for "name goes here" |
| 34 | + # In this case, the return value is "name-goes-here" |
| 35 | + with open(os.path.join(repo_dir, "_summary.md"), "r") as summary_file: |
| 36 | + # Find the first line that matches "Results for" |
| 37 | + for line in summary_file: |
| 38 | + if line.startswith("### Results for"): |
| 39 | + # Extract the quoted name |
| 40 | + return line.split('"')[1].replace(" ", "-") |
| 41 | + return "unknown-name" |
| 42 | + |
| 43 | +def get_repo_alert_counts(repo_dir): |
| 44 | + """ |
| 45 | + Parses the Summary table in the _summary.md file to produce a dict mapping repo NWOs to alert counts. |
| 46 | + """ |
| 47 | + with open(os.path.join(repo_dir, "_summary.md"), "r") as summary_file: |
| 48 | + # Skip ahead to the Summary |
| 49 | + for line in summary_file: |
| 50 | + if line.startswith("### Summary"): |
| 51 | + break |
| 52 | + |
| 53 | + # Match remaining lines to extract the repo NWO and alert count using a regex. |
| 54 | + # Example line: | Nuitka/Nuitka | [45 result(s)](#file-result-01-Nuitka-Nuitka-md) | |
| 55 | + line_re = re.compile(r"\| ([^|]+) \| \[([0-9,]+) result") |
| 56 | + d = {} |
| 57 | + for line in summary_file: |
| 58 | + m = line_re.match(line) |
| 59 | + if m: |
| 60 | + nwo, count = m.groups() |
| 61 | + d[nwo] = int(count.replace(",", "")) |
| 62 | + return d |
| 63 | + |
| 64 | +def get_repo_nwo_shas(repo_dir): |
| 65 | + """ |
| 66 | + Parses each non _summary.md file in the repo_dir to produce a dict mapping repo NWOs to their corresponding SHAs. |
| 67 | + """ |
| 68 | + # We want to look for a match in the file of the form |
| 69 | + # github.com/Nuitka/Nuitka/blob/b289ee4f9d55172ed5165dab262d49bfa9cb2586/ |
| 70 | + # and extract the NWO (as a single unit) and SHA |
| 71 | + nwo_sha_re = re.compile(r"github.com/([^/]+/[^/]+)/blob/([0-9a-f]{40})/") |
| 72 | + |
| 73 | + repo_nwo_shas = {} |
| 74 | + for filename in os.listdir(repo_dir): |
| 75 | + if filename.endswith(".md") and filename != "_summary.md": |
| 76 | + with open(os.path.join(repo_dir, filename), "r") as file: |
| 77 | + for line in file: |
| 78 | + m = nwo_sha_re.search(line) |
| 79 | + if m: |
| 80 | + nwo, sha = m.groups() |
| 81 | + repo_nwo_shas[nwo] = sha |
| 82 | + break |
| 83 | + return repo_nwo_shas |
| 84 | + |
| 85 | +if __name__ == "__main__": |
| 86 | + parser = argparse.ArgumentParser(description="Calculate MRVA totals from a GitHub Gist", epilog=help_text, formatter_class=argparse.RawTextHelpFormatter) |
| 87 | + parser.add_argument("gist_url", nargs='?', help="URL of the GitHub Gist") |
| 88 | + parser.add_argument("--keep-dir", action="store_true", help="Keep the temporary directory") |
| 89 | + parser.add_argument("--min", type=int, help="Minimum number of alerts in repo") |
| 90 | + parser.add_argument("--max", type=int, help="Maximum number of alerts in repo") |
| 91 | + parser.add_argument("--language", type=str, required=True, help="Language of the MRVA run") |
| 92 | + |
| 93 | + args = parser.parse_args() |
| 94 | + |
| 95 | + if not args.gist_url: |
| 96 | + parser.print_help() |
| 97 | + exit(1) |
| 98 | + |
| 99 | + repo_dir = tempfile.mkdtemp(dir=".") |
| 100 | + clone_gist(args.gist_url, repo_dir) |
| 101 | + |
| 102 | + repo_alerts = get_repo_alert_counts(repo_dir) |
| 103 | + repo_nwo_shas = get_repo_nwo_shas(repo_dir) |
| 104 | + |
| 105 | + min_count = args.min if args.min else min(repo_alerts.values()) |
| 106 | + max_count = args.max if args.max else max(repo_alerts.values()) |
| 107 | + |
| 108 | + filtered_alerts = { |
| 109 | + nwo: count for nwo, count in repo_alerts.items() if min_count <= count <= max_count |
| 110 | + } |
| 111 | + |
| 112 | + test_name = get_mrva_test_name(repo_dir) |
| 113 | + |
| 114 | + source_suite_name = f"{test_name}" |
| 115 | + if args.min: |
| 116 | + source_suite_name += f"-min-{args.min}" |
| 117 | + if args.max: |
| 118 | + source_suite_name += f"-max-{args.max}" |
| 119 | + source_suite_name += ".yml" |
| 120 | + |
| 121 | + with open(source_suite_name, "w") as source_suite_file: |
| 122 | + source_suite_file.write("# This file was generated by misc/scripts/mrva-to-dca-source-suite.py\n") |
| 123 | + source_suite_file.write(f"# Input Gist: {args.gist_url}\n\n") |
| 124 | + for nwo, count in filtered_alerts.items(): |
| 125 | + source_suite_file.write(f"- language: {args.language}\n") |
| 126 | + source_suite_file.write(f" sha: {repo_nwo_shas[nwo]}\n") |
| 127 | + source_suite_file.write(f" slug: {nwo} # Alert count: {count}\n") |
| 128 | + |
| 129 | + print(f"Source suite written to {source_suite_name}") |
| 130 | + |
| 131 | + if args.keep_dir: |
| 132 | + print(f"Temporary directory retained at: {repo_dir}") |
| 133 | + else: |
| 134 | + subprocess.run(["rm", "-rf", repo_dir]) |
0 commit comments