diff --git a/TESTING.md b/TESTING.md index b4c60156f..21ea17539 100644 --- a/TESTING.md +++ b/TESTING.md @@ -78,7 +78,7 @@ To run only integration tests that are marked as `encrypted_only`, call: ```bash pytest graphdatascience/tests/integration --encrypted-only -```` +``` ### GDS library versions @@ -90,52 +90,38 @@ For this reason only tests compatible with the GDS library server version you ar ## Style guide -The code follows a rather opinionated style based on [pep8](https://www.python.org/dev/peps/pep-0008/). +The code and examples use [ruff](hhttps://docs.astral.sh/ruff/) to format and lint. You can check all code using all the below mentioned code checking tools by running the `scripts/checkstyle` bash script. There's also a `scripts/makestyle` to do formatting. +Use `SKIP_NOTEBOOKS=true` to only format the code. - -### Linting - -To enforce pep8 conformity (with the exception of using max line length = 120) [flake8](https://flake8.pycqa.org/en/latest/) is used. -To run it to check the entire repository, simply call: - -```bash -flake8 -``` - -from the root. See `.flake8` for our custom flake8 settings. +See `pyproject.toml` for the configuration. -### Formatting +### Static typing -For general formatting we use [black](https://black.readthedocs.io/en/stable/) with default settings. -black can be run to format the entire repository by calling: +The code is annotated with type hints in order to provide documentation and allow for static type analysis with [mypy](http://mypy-lang.org/). +Please note that the `typing` library is used for annotation types in order to stay compatible with Python versions < 3.9. +To run static analysis on the entire repository with mypy, just run: ```bash -black . +mypy . ``` -from the root. See the `[tool.black]` section of `pyproject.toml` for our custom black settings. +from the root. See `mypy.ini` for our custom mypy settings. -Additionally [isort](https://pycqa.github.io/isort/) is used for consistent import sorting. -It can similarly be run to format all source code by calling: -```bash -isort . -``` +## Notebook examples -from the root. See `.isort.cfg` for our custom isort settings. +The notebooks under `/examples` can be run using `scripts/run_notebooks`. -### Static typing +### Cell Tags -The code is annotated with type hints in order to provide documentation and allow for static type analysis with [mypy](http://mypy-lang.org/). -Please note that the `typing` library is used for annotation types in order to stay compatible with Python versions < 3.9. -To run static analysis on the entire repository with mypy, just run: +*Verify version* +If you only want to let CI run the notebook given a certain condition, tag a given cell in the notebook with `verify-version`. +As the name suggests, the tag was introduced to only run for given GDS server versions. -```bash -mypy . -``` +*Teardown* -from the root. See `mypy.ini` for our custom mypy settings. +To make sure certain cells are always run even in case of failure, tag the cell with `teardown`. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 000000000..3b5b6a78c --- /dev/null +++ b/examples/README.md @@ -0,0 +1,24 @@ +# Examples + +This folder contains example notebooks on how to use the `graphdatascience` python client. + + +## Custom cell tags for notebooks + +*Preserve cell outputs* + +By default, `makestyle` will remove all cell outputs. If you want to preserve some outputs, tag the cell with `preserve-output`. + + +## Update /tutorials in docs + +Every notebook is also available as `adoc` version living under `doc/pages/tutorials/`. +The latest published version can be viewed at https://neo4j.com/docs/graph-data-science-client/current/. + +To update the adoc version, run + +```bash +./scripts/nb2doc/convert.sh +``` + +On how to render the docs locally, the doc [README](../doc/README) diff --git a/scripts/checkstyle b/scripts/checkstyle index c79ce87f4..4f793bd22 100755 --- a/scripts/checkstyle +++ b/scripts/checkstyle @@ -17,19 +17,12 @@ NOTEBOOKS="./examples/*.ipynb" # ./examples/dev/*.ipynb" for f in $NOTEBOOKS do NB=$(cat $f) - FORMATTED_NB=$(python -m jupyter nbconvert \ - --clear-output \ - --stdout \ - --ClearOutputPreprocessor.enabled=True \ - --ClearMetadataPreprocessor.enabled=True \ - --ClearMetadataPreprocessor.preserve_cell_metadata_mask='tags' \ - --log-level CRITICAL \ - $f) + FORMATTED_NB=$(python scripts/clean_notebooks.py -i "$f" -o stdout) if [[ "$FORMATTED_NB" != "$NB" ]]; then - echo "Notebook $f is not correctly formatted" - diff --color=always --suppress-common-lines --minimal --side-by-side $NB $FORMATTED_NB + echo "Notebook $f is not correctly formatted. See diff above for more details." + diff --color=always --suppress-common-lines --minimal --side-by-side <(echo "$NB") <(echo "$FORMATTED_NB") exit 1 fi done diff --git a/scripts/clean_notebooks.py b/scripts/clean_notebooks.py new file mode 100644 index 000000000..ccdbde067 --- /dev/null +++ b/scripts/clean_notebooks.py @@ -0,0 +1,75 @@ +# reasons for not using nbconvert cli tool: +# * cannot keep output based on a given tag + +import argparse +import logging +from enum import Enum +from pathlib import Path + +import nbconvert +from nbconvert.preprocessors import Preprocessor + +PRESERVE_CELL_OUTPUT_KEY = "preserve-output" +METADATA_TAG_KEY = "tags" + + +class OutputMode(Enum): + STDOUT = "stdout" + INPLACE = "inplace" + + +class CustomClearOutputPreprocessor(Preprocessor): + """ + Removes the output from all code cells in a notebook. + Option to keep cell output for cells with a given metadata tag + """ + + def preprocess_cell(self, cell, resources, cell_index): + """ + Apply a transformation on each cell. See base.py for details. + """ + if cell.cell_type == "code" and PRESERVE_CELL_OUTPUT_KEY not in cell["metadata"].get(METADATA_TAG_KEY, []): + cell.outputs = [] + cell.execution_count = None + return cell, resources + + +def main(input_path: Path, output_mode: str) -> None: + logger = logging.getLogger("NotebookCleaner") + logger.info(f"Cleaning notebooks from `{input_path}`, mode: `{output_mode}`") + + exporter = nbconvert.NotebookExporter() + + metadata_cleaner = nbconvert.preprocessors.ClearMetadataPreprocessor(preserve_cell_metadata_mask=METADATA_TAG_KEY) + output_cleaner = CustomClearOutputPreprocessor() + + exporter.register_preprocessor(metadata_cleaner, enabled=True) + exporter.register_preprocessor(output_cleaner, enabled=True) + + if input_path.is_file(): + notebooks = [input_path] + else: + notebooks = [f for f in input_path.iterdir() if f.is_file() and f.suffix == ".ipynb"] + + logger.info(f"Formatting {len(notebooks)} notebooks.") + + for notebook in notebooks: + output = exporter.from_filename(notebook) + + formatted_notebook = output[0] + + if output_mode == OutputMode.INPLACE: + with notebook.open(mode="w") as file: + file.write(formatted_notebook) + elif output_mode == OutputMode.STDOUT: + print(formatted_notebook) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-o", "--output", choices=[e.value for e in OutputMode]) + parser.add_argument("-i", "--input", default="examples", help="path to the notebook file or folder") + + args = parser.parse_args() + + main(Path(args.input), OutputMode(args.output)) diff --git a/scripts/makestyle b/scripts/makestyle index 310ae717f..8fa21767d 100755 --- a/scripts/makestyle +++ b/scripts/makestyle @@ -13,13 +13,4 @@ if [ "${SKIP_NOTEBOOKS:-false}" == "true" ]; then exit 0 fi -echo "Cleaning notebooks" -python -m jupyter nbconvert \ ---clear-output \ ---inplace \ ---ClearOutputPreprocessor.enabled=True \ ---ClearMetadataPreprocessor.enabled=True \ - --ClearMetadataPreprocessor.preserve_cell_metadata_mask='tags' \ ---log-level CRITICAL \ -./examples/*.ipynb \ -./examples/dev/*.ipynb +python scripts/clean_notebooks.py -i examples/ -o inplace