-
Notifications
You must be signed in to change notification settings - Fork 50
Expand file tree
/
Copy pathparse_build_for_errors_v2.py
More file actions
142 lines (109 loc) · 4.4 KB
/
parse_build_for_errors_v2.py
File metadata and controls
142 lines (109 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
"""
Post-process JB2-built output to strip student exercise error outputs.
JB1 equivalent: nmaci/scripts/parse_html_for_errors.py
JB2/MyST renders pages as a React SPA. The browser ignores the static HTML and
re-renders everything from window.__remixContext, which is populated from the
per-page .json files in book/_build/html/. Stripping the static HTML alone has
no effect — we must strip the error outputs from the .json mdast trees.
Error output structure in the page JSON (e.g. w1d2-tutorial2.json):
mdast.children[N].children[M] (type='outputs')
└── children[K] (type='output')
jupyter_data: {output_type: 'error', ename: 'NotImplementedError', ...}
We walk every .json file, find 'output' nodes whose jupyter_data.ename matches
our error list, remove them from their parent 'outputs' node, and also remove
the 'outputs' node entirely if it becomes empty.
Run as: python parse_html_for_errors_v2.py student
"""
import json
import os
import sys
sys.argv[1] # "student" or "instructor" — accepted but not used (kept for compat)
ERROR_NAMES = {"NotImplementedError", "NameError"}
HTML_ROOT = "book/_build/html"
def main():
if not os.path.isdir(HTML_ROOT):
print(
f"ERROR: HTML output directory not found: {HTML_ROOT!r} (cwd={os.getcwd()!r})"
)
sys.exit(1)
json_files = []
for dirpath, _dirnames, filenames in os.walk(HTML_ROOT):
for fname in filenames:
# page data files: slug.json (not index.html)
if fname.endswith(".json") and fname != "myst.xref.json":
json_files.append(os.path.join(dirpath, fname))
print(f"Found {len(json_files)} page JSON files under {HTML_ROOT}")
total_removed = 0
files_touched = 0
for json_path in json_files:
with open(json_path, encoding="utf-8") as f:
try:
data = json.load(f)
except json.JSONDecodeError:
continue
mdast = data.get("mdast")
if not mdast:
continue
removed = strip_error_outputs(mdast)
if removed:
total_removed += removed
files_touched += 1
with open(json_path, "w", encoding="utf-8") as f:
json.dump(data, f, separators=(",", ":"))
print(f" Stripped {removed} error output(s) from {json_path}")
print(
f"Done. Removed {total_removed} error output(s) from {files_touched} file(s)."
)
def strip_error_outputs(node):
"""Recursively walk the mdast tree and remove error output nodes.
Targets 'outputs' nodes (type='outputs') that contain one or more
'output' children with jupyter_data.ename in ERROR_NAMES.
Returns count of individual error output nodes removed.
"""
removed = 0
if not isinstance(node, dict):
return 0
children = node.get("children")
if isinstance(children, list):
new_children = []
for child in children:
if isinstance(child, dict) and child.get("type") == "outputs":
# Filter out error outputs from this outputs node
kept, n = filter_error_outputs(child)
removed += n
if kept: # only keep the outputs node if it still has children
new_children.append(child)
# else: drop the now-empty outputs node entirely
else:
removed += strip_error_outputs(child)
new_children.append(child)
node["children"] = new_children
return removed
def filter_error_outputs(outputs_node):
"""Remove error output children from an 'outputs' node in-place.
Returns (has_remaining_children, count_removed).
"""
removed = 0
children = outputs_node.get("children", [])
new_children = []
for child in children:
if not isinstance(child, dict):
new_children.append(child)
continue
if child.get("type") != "output":
new_children.append(child)
continue
jd = child.get("jupyter_data", {})
if (
isinstance(jd, dict)
and jd.get("output_type") == "error"
and jd.get("ename") in ERROR_NAMES
):
removed += 1
else:
new_children.append(child)
outputs_node["children"] = new_children
return bool(new_children), removed
if __name__ == "__main__":
main()