Skip to content

Commit 8808f0f

Browse files
committed
Misc: Add script for calculating MRVA totals
Use this script if you want to quickly calculate the totals of some query across all the queries in a MRVA run. For an example of such a query, see e.g. `python/ql/src/Metrics/Internal/TypeAnnotations.ql` The script expects the query to produce an output table of the form ``` | header0 | header1 | header2 | header3 | ... |----------|----------|----------|----------|---- | message1 | value11 | value12 | value13 | ... | message2 | value21 | value22 | value23 | ... ... ``` where all of the `values` are numbers. For each `(message, header)` pair, it then calculates the total of all the values in that cell, across all of the repos in the MRVA run. To use the script, simply pass it the URL of the exported Gist of the MRVA run. After calculating the totals, the script will then (optionally, but by default) add the totals to the `_summary.md` file, and push these changes to the Gist.
1 parent 7a589c4 commit 8808f0f

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

misc/scripts/calculate_mrva_totals.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import os
2+
import subprocess
3+
import tempfile
4+
import argparse
5+
from collections import defaultdict
6+
7+
help_text = """
8+
To use this script, pass the URL of a GitHub Gist as an argument. The Gist should contain the
9+
exported MarkDown output of a MRVA run.
10+
11+
The script expects the query to produce an output table of the form
12+
```
13+
| header0 | header1 | header2 | header3 | ...
14+
|----------|----------|----------|----------|----
15+
| message1 | value11 | value12 | value13 | ...
16+
| message2 | value21 | value22 | value23 | ...
17+
...
18+
```
19+
The script will calculate the totals for each message and header, and put a table containing these
20+
totals in the `_summary.md` file in the Gist. By default it will then commit and push these changes
21+
to the Gist (having first displayed a diff of the changes).
22+
"""
23+
24+
first_header = ""
25+
26+
def split_line(line):
27+
return [item.strip() for item in line.strip('|').split('|')]
28+
29+
def parse_markdown_table(stream):
30+
global first_header
31+
iterator = (line.strip() for line in stream)
32+
33+
# Skip irrelevant lines until we find the header line
34+
for line in iterator:
35+
if line.startswith('|'):
36+
first_header, *headers = split_line(line)
37+
break
38+
39+
# Skip the separator line
40+
next(iterator)
41+
42+
data_dict = {}
43+
44+
# Process the remaining lines
45+
for line in iterator:
46+
if line.startswith('|'):
47+
message, *values = [value.strip('`') for value in split_line(line)]
48+
data_dict[message] = {
49+
headers[i]: int(value) if value.isdigit() else value
50+
for i, value in enumerate(values)
51+
}
52+
53+
return data_dict
54+
55+
def clone_gist(gist_url, repo_dir):
56+
try:
57+
subprocess.run(["gh", "gist", "clone", gist_url, repo_dir], check=True)
58+
except subprocess.CalledProcessError:
59+
print(f"Failed to clone the gist from {gist_url}")
60+
subprocess.run(["rm", "-rf", repo_dir])
61+
exit(1)
62+
63+
def process_gist_files(repo_dir):
64+
total_data = defaultdict(lambda: defaultdict(int))
65+
66+
for filename in os.listdir(repo_dir):
67+
if filename.endswith(".md") and filename != "_summary.md":
68+
with open(os.path.join(repo_dir, filename), "r") as file:
69+
data_dict = parse_markdown_table(file)
70+
71+
for message, values in data_dict.items():
72+
for header, value in values.items():
73+
if isinstance(value, int):
74+
total_data[message][header] += value
75+
76+
return total_data
77+
78+
def append_totals_to_summary(total_data, repo_dir):
79+
global first_header
80+
summary_path = os.path.join(repo_dir, "_summary.md")
81+
with open(summary_path, "r") as summary_file:
82+
content = summary_file.read()
83+
84+
totals_table = "\n\n### Totals\n\n"
85+
headers = [first_header] + list(next(iter(total_data.values())).keys())
86+
totals_table += "| " + " | ".join(headers) + " |\n"
87+
totals_table += "| " + "|".join(["---"] + ["---:"] * (len(headers) - 1)) + " |\n" # Right align all but the first column
88+
for message, values in total_data.items():
89+
row = [message] + [f"{values[header]:,}" for header in headers[1:]]
90+
totals_table += "| " + " | ".join(row) + " |\n"
91+
92+
new_content = content.replace("### Summary", totals_table + "\n### Summary")
93+
94+
with open(summary_path, "w") as summary_file:
95+
summary_file.write(new_content)
96+
97+
def commit_and_push_changes(repo_dir):
98+
subprocess.run(["git", "add", "_summary.md"], cwd=repo_dir, check=True)
99+
subprocess.run(["git", "commit", "-m", "Update summary with totals"], cwd=repo_dir, check=True)
100+
subprocess.run(["git", "push"], cwd=repo_dir, check=True)
101+
102+
def show_git_diff(repo_dir):
103+
subprocess.run(["git", "diff", "_summary.md"], cwd=repo_dir, check=True)
104+
105+
if __name__ == "__main__":
106+
parser = argparse.ArgumentParser(description="Calculate MRVA totals from a GitHub Gist", epilog=help_text, formatter_class=argparse.RawTextHelpFormatter)
107+
parser.add_argument("gist_url", nargs='?', help="URL of the GitHub Gist")
108+
parser.add_argument("--keep-dir", action="store_true", help="Keep the temporary directory")
109+
110+
args = parser.parse_args()
111+
112+
if not args.gist_url:
113+
parser.print_help()
114+
exit(1)
115+
116+
repo_dir = tempfile.mkdtemp(dir=".")
117+
clone_gist(args.gist_url, repo_dir)
118+
119+
total_data = process_gist_files(repo_dir)
120+
121+
append_totals_to_summary(total_data, repo_dir)
122+
123+
show_git_diff(repo_dir)
124+
125+
if input("Do you want to push the changes to the gist? (Y/n): ").strip().lower() in ['y', '']:
126+
commit_and_push_changes(repo_dir)
127+
128+
if args.keep_dir:
129+
print(f"Temporary directory retained at: {repo_dir}")
130+
else:
131+
subprocess.run(["rm", "-rf", repo_dir])

0 commit comments

Comments
 (0)