|
1 | 1 | #!/usr/bin/env python
|
2 | 2 | import json
|
3 |
| -import os |
4 | 3 | from pathlib import Path
|
5 | 4 |
|
6 | 5 | import click
|
7 | 6 | import jsonschema
|
| 7 | +import nbformat |
8 | 8 | import sqlfluff
|
9 |
| -from nbformat import write as write_notebook |
10 | 9 | from nbmerge import merge_notebooks
|
11 | 10 | from sqlfluff.core import FluffConfig
|
12 | 11 |
|
@@ -109,80 +108,72 @@ def __init__(self, filename):
|
109 | 108 | super().__init__(f"{filename} is invalid")
|
110 | 109 |
|
111 | 110 |
|
112 |
| -def yield_notebooks(): |
113 |
| - for entry in os.scandir(BASEDIR): |
114 |
| - if not entry.name.endswith(".ipynb"): |
115 |
| - continue |
| 111 | +def json_dump(path, notebook): |
| 112 | + with path.open("w") as f: |
| 113 | + # Use indent=2 like Google Colab for small diffs. |
| 114 | + json.dump(notebook, f, ensure_ascii=False, indent=2) |
| 115 | + f.write("\n") |
116 | 116 |
|
117 |
| - path = Path(entry.path) |
118 |
| - with path.open() as f: |
119 |
| - try: |
120 |
| - notebook = json.load(f) |
121 |
| - except json.decoder.JSONDecodeError as e: |
122 |
| - raise InvalidNotebookError(path) from e |
123 | 117 |
|
124 |
| - yield entry.name, path, notebook |
| 118 | +def json_load(path): |
| 119 | + with path.open() as f: |
| 120 | + try: |
| 121 | + return json.load(f) |
| 122 | + except json.decoder.JSONDecodeError as e: |
| 123 | + raise InvalidNotebookError(path) from e |
125 | 124 |
|
126 | 125 |
|
127 |
| -def yield_cells(notebook): |
128 |
| - for cell in notebook["cells"]: |
129 |
| - if cell["cell_type"] != "code": |
130 |
| - continue |
| 126 | +@click.command() |
| 127 | +@click.argument("filename", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path)) |
| 128 | +def pre_commit(filename): |
| 129 | + """Format SQL cells in Jupyter Notebooks and merge components to build notebooks.""" |
| 130 | + nonzero = False |
131 | 131 |
|
132 |
| - source = cell["source"] |
133 |
| - if "%%sql" not in source[0]: |
134 |
| - continue |
| 132 | + filenames = [path for path in filename if path.name.startswith("component_")] |
135 | 133 |
|
136 |
| - sql = "".join(source[1:]) |
| 134 | + for path in filenames: |
| 135 | + notebook = json_load(path) |
137 | 136 |
|
138 |
| - fix = sqlfluff.fix(sql, config=FLUFF_CONFIG) |
139 |
| - for warning in sqlfluff.lint(fix, config=FLUFF_CONFIG): |
140 |
| - click.secho(f"{warning['code']}:{warning['name']} {warning['description']}", fg="yellow") |
141 |
| - click.echo(fix[:warning['start_file_pos']], nl=False) |
142 |
| - click.secho(fix[warning['start_file_pos']:warning['end_file_pos']], fg="red", nl=False) |
143 |
| - click.echo(fix[warning['end_file_pos']:]) |
| 137 | + for cell in notebook["cells"]: |
| 138 | + if cell["cell_type"] != "code": |
| 139 | + continue |
144 | 140 |
|
145 |
| - yield source, cell, sql, fix |
| 141 | + source = cell["source"] |
| 142 | + if "%%sql" not in source[0]: |
| 143 | + continue |
146 | 144 |
|
| 145 | + fix = sqlfluff.fix("".join(source[1:]), config=FLUFF_CONFIG) |
| 146 | + cell["source"] = [source[0], "\n", *fix.splitlines(keepends=True)] |
147 | 147 |
|
148 |
| -def build_notebook(slug): |
149 |
| - try: |
150 |
| - notebook = merge_notebooks(BASEDIR, [f"{c}.ipynb" for c in NOTEBOOKS[slug]], verbose=False) |
151 |
| - notebook["metadata"]["colab"]["name"] = slug |
152 |
| - except jsonschema.exceptions.ValidationError as e: |
153 |
| - raise InvalidNotebookError(f"{slug}.ipynb") from e |
154 |
| - else: |
155 |
| - return notebook |
| 148 | + warnings = sqlfluff.lint(fix, config=FLUFF_CONFIG) |
| 149 | + nonzero |= bool(warnings) |
156 | 150 |
|
| 151 | + for warning in warnings: |
| 152 | + click.secho(f"{warning['code']}:{warning['name']} {warning['description']}", fg="yellow") |
| 153 | + click.echo(fix[:warning['start_file_pos']], nl=False) |
| 154 | + click.secho(fix[warning['start_file_pos']:warning['end_file_pos']], fg="red", nl=False) |
| 155 | + click.echo(fix[warning['end_file_pos']:]) |
157 | 156 |
|
158 |
| -def json_dump(path, notebook): |
159 |
| - with path.open("w") as f: |
160 |
| - # Use indent=2 like Google Colab for small diffs. |
161 |
| - json.dump(notebook, f, ensure_ascii=False, indent=2) |
162 |
| - f.write("\n") |
| 157 | + json_dump(path, notebook) |
163 | 158 |
|
| 159 | + for slug, components in NOTEBOOKS.items(): |
| 160 | + if any(path.stem in components for path in filenames): |
| 161 | + template_path = Path(f"{slug}.ipynb") |
| 162 | + with template_path.open("w", encoding="utf8") as f: |
| 163 | + try: |
| 164 | + notebook = merge_notebooks(BASEDIR, [f"{c}.ipynb" for c in NOTEBOOKS[slug]], verbose=False) |
| 165 | + notebook["metadata"]["colab"]["name"] = slug |
| 166 | + except jsonschema.exceptions.ValidationError as e: |
| 167 | + raise InvalidNotebookError(f"{slug}.ipynb") from e |
| 168 | + else: |
| 169 | + nbformat.write(notebook, f) |
164 | 170 |
|
165 |
| -@click.command() |
166 |
| -@click.argument("filename", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path)) |
167 |
| -def pre_commit(filename): |
168 |
| - """Format SQL cells in Jupyter Notebooks and merge components to build notebooks.""" |
169 |
| - resolved = [path.resolve() for path in filename] |
170 |
| - |
171 |
| - for _, filepath, notebook in yield_notebooks(): |
172 |
| - if not resolved or filepath.resolve() in resolved: |
173 |
| - for source, cell, _, sql_formatted in yield_cells(notebook): |
174 |
| - cell["source"] = [source[0], "\n", *sql_formatted.splitlines(keepends=True)] |
175 |
| - |
176 |
| - json_dump(filepath, notebook) |
177 |
| - |
178 |
| - for slug in NOTEBOOKS: |
179 |
| - filepath = Path(f"{slug}.ipynb") |
180 |
| - with filepath.open("w", encoding="utf8") as f: |
181 |
| - write_notebook(build_notebook(slug), f) |
182 |
| - # nbformat uses indent=1. |
183 |
| - with filepath.open() as f: |
184 |
| - notebook = json.load(f) |
185 |
| - json_dump(filepath, notebook) |
| 171 | + # nbformat.write() uses indent=1. Rewrite with indent=2 like Google Colab. |
| 172 | + # https://github.com/jupyter/nbformat/blob/ba2c6f5/nbformat/v4/nbjson.py#L51 |
| 173 | + json_dump(template_path, json_load(template_path)) |
| 174 | + |
| 175 | + if nonzero: |
| 176 | + raise click.Abort("error") |
186 | 177 |
|
187 | 178 |
|
188 | 179 | if __name__ == "__main__":
|
|
0 commit comments