Skip to content

Commit 8f601d7

Browse files
committed
fix: Fix custom pre-commit hook
This command was being run by pre-commit, as, for example, `pre-commit run nbqa --all-files` or `--files ...` Given the configuration in .pre-commit-config.yaml, this runs: `nbqa manage FILE ...` nbqa creates a subprocess, replacing the original files with temporary files. (It then rewrites stdout and stderr to replace the temporary filenames with the original filenames, making this behavior hard to observe.) The temporary files are transformations of the original files, in which magic cells are removed. This makes sense for other linters, that operate on Python code. Since we want to operate on SQL magics, nbqa is an inappropriate entry point for a pre-commit hook.
1 parent f3576e3 commit 8f601d7

File tree

3 files changed

+60
-67
lines changed

3 files changed

+60
-67
lines changed

.github/workflows/lint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- uses: pre-commit/[email protected]
2727
continue-on-error: true
2828
with:
29-
extra_args: nbqa --files ${{ steps.changed-files.outputs.all_changed_files }}
29+
extra_args: local --files ${{ steps.changed-files.outputs.all_changed_files }}
3030
- if: ${{ env.PAT }}
3131
uses: stefanzweifel/git-auto-commit-action@v5
3232
with:

.pre-commit-config.yaml

+6-4
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ repos:
2424
rev: 1.8.7
2525
hooks:
2626
- id: nbqa-black
27-
- id: nbqa
28-
entry: nbqa manage
29-
name: nbqa-manage
30-
alias: nbqa-manage
27+
- repo: local
28+
hooks:
29+
- id: local
30+
name: local
31+
language: python
32+
entry: ./manage.py
3133
additional_dependencies: [click, jsonschema, nbmerge, nbformat, sqlfluff]

manage.py

+53-62
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
#!/usr/bin/env python
22
import json
3-
import os
43
from pathlib import Path
54

65
import click
76
import jsonschema
7+
import nbformat
88
import sqlfluff
9-
from nbformat import write as write_notebook
109
from nbmerge import merge_notebooks
1110
from sqlfluff.core import FluffConfig
1211

@@ -109,80 +108,72 @@ def __init__(self, filename):
109108
super().__init__(f"{filename} is invalid")
110109

111110

112-
def yield_notebooks():
113-
for entry in os.scandir(BASEDIR):
114-
if not entry.name.endswith(".ipynb"):
115-
continue
111+
def json_dump(path, notebook):
112+
with path.open("w") as f:
113+
# Use indent=2 like Google Colab for small diffs.
114+
json.dump(notebook, f, ensure_ascii=False, indent=2)
115+
f.write("\n")
116116

117-
path = Path(entry.path)
118-
with path.open() as f:
119-
try:
120-
notebook = json.load(f)
121-
except json.decoder.JSONDecodeError as e:
122-
raise InvalidNotebookError(path) from e
123117

124-
yield entry.name, path, notebook
118+
def json_load(path):
119+
with path.open() as f:
120+
try:
121+
return json.load(f)
122+
except json.decoder.JSONDecodeError as e:
123+
raise InvalidNotebookError(path) from e
125124

126125

127-
def yield_cells(notebook):
128-
for cell in notebook["cells"]:
129-
if cell["cell_type"] != "code":
130-
continue
126+
@click.command()
127+
@click.argument("filename", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path))
128+
def pre_commit(filename):
129+
"""Format SQL cells in Jupyter Notebooks and merge components to build notebooks."""
130+
nonzero = False
131131

132-
source = cell["source"]
133-
if "%%sql" not in source[0]:
134-
continue
132+
filenames = [path for path in filename if path.name.startswith("component_")]
135133

136-
sql = "".join(source[1:])
134+
for path in filenames:
135+
notebook = json_load(path)
137136

138-
fix = sqlfluff.fix(sql, config=FLUFF_CONFIG)
139-
for warning in sqlfluff.lint(fix, config=FLUFF_CONFIG):
140-
click.secho(f"{warning['code']}:{warning['name']} {warning['description']}", fg="yellow")
141-
click.echo(fix[:warning['start_file_pos']], nl=False)
142-
click.secho(fix[warning['start_file_pos']:warning['end_file_pos']], fg="red", nl=False)
143-
click.echo(fix[warning['end_file_pos']:])
137+
for cell in notebook["cells"]:
138+
if cell["cell_type"] != "code":
139+
continue
144140

145-
yield source, cell, sql, fix
141+
source = cell["source"]
142+
if "%%sql" not in source[0]:
143+
continue
146144

145+
fix = sqlfluff.fix("".join(source[1:]), config=FLUFF_CONFIG)
146+
cell["source"] = [source[0], "\n", *fix.splitlines(keepends=True)]
147147

148-
def build_notebook(slug):
149-
try:
150-
notebook = merge_notebooks(BASEDIR, [f"{c}.ipynb" for c in NOTEBOOKS[slug]], verbose=False)
151-
notebook["metadata"]["colab"]["name"] = slug
152-
except jsonschema.exceptions.ValidationError as e:
153-
raise InvalidNotebookError(f"{slug}.ipynb") from e
154-
else:
155-
return notebook
148+
warnings = sqlfluff.lint(fix, config=FLUFF_CONFIG)
149+
nonzero |= bool(warnings)
156150

151+
for warning in warnings:
152+
click.secho(f"{warning['code']}:{warning['name']} {warning['description']}", fg="yellow")
153+
click.echo(fix[:warning['start_file_pos']], nl=False)
154+
click.secho(fix[warning['start_file_pos']:warning['end_file_pos']], fg="red", nl=False)
155+
click.echo(fix[warning['end_file_pos']:])
157156

158-
def json_dump(path, notebook):
159-
with path.open("w") as f:
160-
# Use indent=2 like Google Colab for small diffs.
161-
json.dump(notebook, f, ensure_ascii=False, indent=2)
162-
f.write("\n")
157+
json_dump(path, notebook)
163158

159+
for slug, components in NOTEBOOKS.items():
160+
if any(path.stem in components for path in filenames):
161+
template_path = Path(f"{slug}.ipynb")
162+
with template_path.open("w", encoding="utf8") as f:
163+
try:
164+
notebook = merge_notebooks(BASEDIR, [f"{c}.ipynb" for c in NOTEBOOKS[slug]], verbose=False)
165+
notebook["metadata"]["colab"]["name"] = slug
166+
except jsonschema.exceptions.ValidationError as e:
167+
raise InvalidNotebookError(f"{slug}.ipynb") from e
168+
else:
169+
nbformat.write(notebook, f)
164170

165-
@click.command()
166-
@click.argument("filename", nargs=-1, type=click.Path(exists=True, dir_okay=False, path_type=Path))
167-
def pre_commit(filename):
168-
"""Format SQL cells in Jupyter Notebooks and merge components to build notebooks."""
169-
resolved = [path.resolve() for path in filename]
170-
171-
for _, filepath, notebook in yield_notebooks():
172-
if not resolved or filepath.resolve() in resolved:
173-
for source, cell, _, sql_formatted in yield_cells(notebook):
174-
cell["source"] = [source[0], "\n", *sql_formatted.splitlines(keepends=True)]
175-
176-
json_dump(filepath, notebook)
177-
178-
for slug in NOTEBOOKS:
179-
filepath = Path(f"{slug}.ipynb")
180-
with filepath.open("w", encoding="utf8") as f:
181-
write_notebook(build_notebook(slug), f)
182-
# nbformat uses indent=1.
183-
with filepath.open() as f:
184-
notebook = json.load(f)
185-
json_dump(filepath, notebook)
171+
# nbformat.write() uses indent=1. Rewrite with indent=2 like Google Colab.
172+
# https://github.com/jupyter/nbformat/blob/ba2c6f5/nbformat/v4/nbjson.py#L51
173+
json_dump(template_path, json_load(template_path))
174+
175+
if nonzero:
176+
raise click.Abort("error")
186177

187178

188179
if __name__ == "__main__":

0 commit comments

Comments
 (0)