From ef746b1b49cb4ada6b35270193fc04a8702adeab Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 2 Dec 2021 12:47:58 +0000 Subject: [PATCH 01/26] Initial validate support, with --warnings option --- .isort.cfg | 2 +- ome_zarr/cli.py | 13 +++ ome_zarr/reader.py | 35 ++++++++ ome_zarr/schemas.py | 208 ++++++++++++++++++++++++++++++++++++++++++++ ome_zarr/utils.py | 20 +++++ 5 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 ome_zarr/schemas.py diff --git a/.isort.cfg b/.isort.cfg index b3d36cbe..3088410b 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,5 +1,5 @@ [settings] -known_third_party = dask,numpy,pytest,scipy,setuptools,skimage,zarr +known_third_party = dask,jsonschema,numpy,pytest,scipy,setuptools,skimage,zarr multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index d32ed0a0..8de9ef20 100755 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -9,6 +9,7 @@ from .scale import Scaler from .utils import download as zarr_download from .utils import info as zarr_info +from .utils import validate as zarr_validate def config_logging(loglevel: int, args: argparse.Namespace) -> None: @@ -29,6 +30,12 @@ def info(args: argparse.Namespace) -> None: list(zarr_info(args.path, stats=args.stats)) +def validate(args: argparse.Namespace) -> None: + """Wrap the :func:`~ome_zarr.utils.validate` method.""" + config_logging(logging.WARN, args) + list(zarr_validate(args.path, args.warnings)) + + def download(args: argparse.Namespace) -> None: """Wrap the :func:`~ome_zarr.utils.download` method.""" config_logging(logging.WARN, args) @@ -99,6 +106,12 @@ def main(args: List[str] = None) -> None: parser_info.add_argument("--stats", action="store_true") parser_info.set_defaults(func=info) + # validate + parser_validate = subparsers.add_parser("validate") + parser_validate.add_argument("path") + parser_validate.add_argument("--warnings", action="store_true") + parser_validate.set_defaults(func=validate) + # download parser_download = subparsers.add_parser("download") parser_download.add_argument("path") diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 13993479..6ec772fe 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -8,8 +8,11 @@ import dask.array as da import numpy as np from dask import delayed +from jsonschema import validate +from jsonschema.validators import validator_for from .io import ZarrLocation +from .schemas import get_schema, get_strict_schema from .types import JSONDict LOGGER = logging.getLogger("ome_zarr.reader") @@ -104,6 +107,12 @@ def load(self, spec_type: Type["Spec"]) -> Optional["Spec"]: return spec return None + def validate(self, warnings: bool) -> None: + # Validation for a node is delegated to each spec + # e.g. Labels may have spec for multiscales and labels + for spec in self.specs: + spec.validate(warnings) + def add( self, zarr: ZarrLocation, @@ -175,6 +184,10 @@ def __init__(self, node: Node) -> None: def lookup(self, key: str, default: Any) -> Any: return self.zarr.root_attrs.get(key, default) + def validate(self, warnings: bool = False) -> None: + # If not implemented, ignore for now + pass + class Labels(Spec): """Relatively small specification for the well-known "labels" group which only @@ -315,6 +328,28 @@ def array(self, resolution: str, version: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) + def validate(self, warnings: bool = False) -> None: + multiscales = self.lookup("multiscales", []) + version = multiscales[0].get("version", "0.1") + print("Validating Multiscales spec at", self.zarr) + print("Using Multiscales schema version", version) + image_schema = get_schema(version) + + # Always do a validation with the MUST rules + # Will throw ValidationException if it fails + json_data = self.zarr.root_attrs + validate(instance=json_data, schema=image_schema) + + # If we're also checking for SHOULD rules, + # we want to iterate all errors and show as "Warnings" + if warnings: + strict_schema = get_strict_schema(version) + cls = validator_for(strict_schema) + cls.check_schema(strict_schema) + validator = cls(strict_schema) + for error in validator.iter_errors(json_data): + print("WARNING", error.message) + class OMERO(Spec): @staticmethod diff --git a/ome_zarr/schemas.py b/ome_zarr/schemas.py new file mode 100644 index 00000000..f40f1127 --- /dev/null +++ b/ome_zarr/schemas.py @@ -0,0 +1,208 @@ +import copy +from typing import Dict + +# import requests + + +def get_schema(version: str) -> Dict: + + # Possible strategy for loading schemas, but probably want + # to package schema with the release. + # url = ( + # "https://raw.githubusercontent.com/ome/ngff/main/" + # "0.3/schemas/json_schema/image.schema" + # ) + # r = requests.get(url) + # return r.json() + + # For now, embed the schemas below and simply return the corrent one + + if version == "0.3": + return image_schema_3 + elif version == "0.1": + return image_schema_1 + else: + raise ValueError(f"Version {version} not supported") + + +def get_strict_schema(version: str) -> Dict: + + if version == "0.3": + return merge(copy.deepcopy(image_schema_3), image_strict_3) + elif version == "0.1": + return merge(copy.deepcopy(image_schema_1), image_strict_1) + else: + raise ValueError(f"Version {version} not supported") + + +def merge(destination: Dict, source: Dict) -> Dict: + """ + deep merge of source into destination dict + https://stackoverflow.com/questions/20656135/python-deep-merge-dictionary-data + """ + for key, value in source.items(): + if isinstance(value, dict): + node = destination.setdefault(key, {}) + merge(node, value) + else: + destination[key] = value + + return destination + + +image_strict_1 = { + "properties": { + "multiscales": { + "items": {"required": ["version", "name", "type", "metadata", "datasets"]} + } + } +} + +image_strict_3 = { + "properties": { + "multiscales": { + "items": { + "required": ["version", "name", "type", "axes", "metadata", "datasets"] + } + } + } +} + +image_schema_1 = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://localhost:8000/image.schema", + "title": "NGFF Image", + "description": "JSON from OME-NGFF .zattrs", + "type": "object", + "properties": { + "multiscales": { + "description": "The multiscale datasets for this image", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"], + }, + }, + "version": {"type": "string", "enum": ["0.1"]}, + "metadata": { + "type": "object", + "properties": { + "method": {"type": "string"}, + "version": {"type": "string"}, + }, + }, + }, + "required": ["datasets"], + }, + "minItems": 1, + "uniqueItems": True, + }, + "omero": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "window": { + "type": "object", + "properties": { + "end": {"type": "number"}, + "max": {"type": "number"}, + "min": {"type": "number"}, + "start": {"type": "number"}, + }, + "required": ["start", "min", "end", "max"], + }, + "label": {"type": "string"}, + "family": {"type": "string"}, + "color": {"type": "string"}, + "active": {"type": "boolean"}, + }, + "required": ["window", "color"], + }, + } + }, + "required": ["channels"], + }, + }, + "required": ["multiscales"], +} + + +image_schema_3 = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://localhost:8000/image.schema", + "title": "NGFF Image", + "description": "JSON from OME-NGFF .zattrs", + "type": "object", + "properties": { + "multiscales": { + "description": "The multiscale datasets for this image", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"], + }, + }, + "version": {"type": "string", "enum": ["0.3"]}, + "axes": { + "type": "array", + "minItems": 2, + "items": {"type": "string", "pattern": "^[xyzct]$"}, + }, + }, + "required": ["datasets", "axes"], + }, + "minItems": 1, + "uniqueItems": True, + }, + "omero": { + "type": "object", + "properties": { + "channels": { + "type": "array", + "items": { + "type": "object", + "properties": { + "window": { + "type": "object", + "properties": { + "end": {"type": "number"}, + "max": {"type": "number"}, + "min": {"type": "number"}, + "start": {"type": "number"}, + }, + "required": ["start", "min", "end", "max"], + }, + "label": {"type": "string"}, + "family": {"type": "string"}, + "color": {"type": "string"}, + "active": {"type": "boolean"}, + }, + "required": ["window", "color"], + }, + } + }, + "required": ["channels"], + }, + }, + "required": ["multiscales"], +} diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index 21aa92b9..7148e22b 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -46,6 +46,26 @@ def info(path: str, stats: bool = False) -> Iterator[Node]: yield node +def validate(path: str, warnings: bool) -> Iterator[Node]: + """ + Validate OME-NGFF data + + All :class:`Nodes ` that are found from the given path will + be visited recursively. + """ + zarr = parse_url(path) + assert zarr, f"not a zarr: {zarr}" + reader = Reader(zarr) + for node in reader(): + if not node.specs: + print(f"not an ome-zarr node: {node}") + continue + + if hasattr(node, "validate"): + node.validate(warnings) + yield node + + def download(input_path: str, output_dir: str = ".") -> None: """Download an OME-Zarr from the given path. From ace592acc80f1ebed3c7392e4d492acc918098ea Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 2 Dec 2021 14:06:18 +0000 Subject: [PATCH 02/26] Add jsonschema to deps in setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e9753ca0..2562add5 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ def read(fname): install_requires += (["requests"],) install_requires += (["scikit-image"],) install_requires += (["toolz"],) +install_requires += (["jsonschema"],) setup( From 7d26157a277231e1f5525521f177728a28835e94 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 3 Dec 2021 17:35:48 +0000 Subject: [PATCH 03/26] Copy schemas from ngff. Use LocalRefResolver to load schemas --- ome_zarr/reader.py | 6 +- ome_zarr/schemas.py | 231 +++--------------- schemas/.DS_Store | Bin 0 -> 6148 bytes schemas/0.1/schemas/.DS_Store | Bin 0 -> 6148 bytes schemas/0.1/schemas/json_schema/.DS_Store | Bin 0 -> 6148 bytes schemas/0.1/schemas/json_schema/image.schema | 112 +++++++++ schemas/0.1/schemas/json_schema/plate.schema | 112 +++++++++ .../schemas/json_schema/strict_image.schema | 18 ++ schemas/0.3/schemas/.DS_Store | Bin 0 -> 6148 bytes schemas/0.3/schemas/json_schema/.DS_Store | Bin 0 -> 6148 bytes schemas/0.3/schemas/json_schema/image.schema | 109 +++++++++ .../schemas/json_schema/strict_image.schema | 18 ++ 12 files changed, 406 insertions(+), 200 deletions(-) create mode 100644 schemas/.DS_Store create mode 100644 schemas/0.1/schemas/.DS_Store create mode 100644 schemas/0.1/schemas/json_schema/.DS_Store create mode 100644 schemas/0.1/schemas/json_schema/image.schema create mode 100644 schemas/0.1/schemas/json_schema/plate.schema create mode 100644 schemas/0.1/schemas/json_schema/strict_image.schema create mode 100644 schemas/0.3/schemas/.DS_Store create mode 100644 schemas/0.3/schemas/json_schema/.DS_Store create mode 100644 schemas/0.3/schemas/json_schema/image.schema create mode 100644 schemas/0.3/schemas/json_schema/strict_image.schema diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 6ec772fe..336fd43b 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -12,7 +12,7 @@ from jsonschema.validators import validator_for from .io import ZarrLocation -from .schemas import get_schema, get_strict_schema +from .schemas import LocalRefResolver, get_schema, get_strict_schema from .types import JSONDict LOGGER = logging.getLogger("ome_zarr.reader") @@ -346,7 +346,9 @@ def validate(self, warnings: bool = False) -> None: strict_schema = get_strict_schema(version) cls = validator_for(strict_schema) cls.check_schema(strict_schema) - validator = cls(strict_schema) + # Use our local resolver subclass to resolve local documents + localResolver = LocalRefResolver.from_schema(strict_schema) + validator = cls(strict_schema, resolver=localResolver) for error in validator.iter_errors(json_data): print("WARNING", error.message) diff --git a/ome_zarr/schemas.py b/ome_zarr/schemas.py index f40f1127..f94e3eda 100644 --- a/ome_zarr/schemas.py +++ b/ome_zarr/schemas.py @@ -1,208 +1,43 @@ -import copy +import json +import os from typing import Dict -# import requests +from jsonschema import RefResolver -def get_schema(version: str) -> Dict: - - # Possible strategy for loading schemas, but probably want - # to package schema with the release. - # url = ( - # "https://raw.githubusercontent.com/ome/ngff/main/" - # "0.3/schemas/json_schema/image.schema" - # ) - # r = requests.get(url) - # return r.json() - - # For now, embed the schemas below and simply return the corrent one - - if version == "0.3": - return image_schema_3 - elif version == "0.1": - return image_schema_1 - else: - raise ValueError(f"Version {version} not supported") - - -def get_strict_schema(version: str) -> Dict: +class LocalRefResolver(RefResolver): + def resolve_remote(self, url: str) -> Dict: + # Use remote URL to generate local path + rel_path = url.replace("https://ngff.openmicroscopy.org/", "../schemas/") + curr_dir = os.path.dirname(__file__) + path = os.path.join(curr_dir, rel_path) + path = os.path.normpath(path) + # Load local document and cache it + document = load_json(path) + self.store[url] = document + return document - if version == "0.3": - return merge(copy.deepcopy(image_schema_3), image_strict_3) - elif version == "0.1": - return merge(copy.deepcopy(image_schema_1), image_strict_1) - else: - raise ValueError(f"Version {version} not supported") +def load_json(path: str) -> Dict: + with open(path) as f: + document = json.loads(f.read()) + return document -def merge(destination: Dict, source: Dict) -> Dict: - """ - deep merge of source into destination dict - https://stackoverflow.com/questions/20656135/python-deep-merge-dictionary-data - """ - for key, value in source.items(): - if isinstance(value, dict): - node = destination.setdefault(key, {}) - merge(node, value) - else: - destination[key] = value - return destination - - -image_strict_1 = { - "properties": { - "multiscales": { - "items": {"required": ["version", "name", "type", "metadata", "datasets"]} - } - } -} - -image_strict_3 = { - "properties": { - "multiscales": { - "items": { - "required": ["version", "name", "type", "axes", "metadata", "datasets"] - } - } - } -} - -image_schema_1 = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://localhost:8000/image.schema", - "title": "NGFF Image", - "description": "JSON from OME-NGFF .zattrs", - "type": "object", - "properties": { - "multiscales": { - "description": "The multiscale datasets for this image", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "datasets": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "properties": {"path": {"type": "string"}}, - "required": ["path"], - }, - }, - "version": {"type": "string", "enum": ["0.1"]}, - "metadata": { - "type": "object", - "properties": { - "method": {"type": "string"}, - "version": {"type": "string"}, - }, - }, - }, - "required": ["datasets"], - }, - "minItems": 1, - "uniqueItems": True, - }, - "omero": { - "type": "object", - "properties": { - "channels": { - "type": "array", - "items": { - "type": "object", - "properties": { - "window": { - "type": "object", - "properties": { - "end": {"type": "number"}, - "max": {"type": "number"}, - "min": {"type": "number"}, - "start": {"type": "number"}, - }, - "required": ["start", "min", "end", "max"], - }, - "label": {"type": "string"}, - "family": {"type": "string"}, - "color": {"type": "string"}, - "active": {"type": "boolean"}, - }, - "required": ["window", "color"], - }, - } - }, - "required": ["channels"], - }, - }, - "required": ["multiscales"], -} +def get_schema(version: str) -> Dict: + curr_dir = os.path.dirname(__file__) + # The paths here match the paths in the ngff repo (and public schemas) + path = os.path.join( + curr_dir, f"../schemas/{version}/schemas/json_schema/image.schema" + ) + path = os.path.normpath(path) + return load_json(path) -image_schema_3 = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://localhost:8000/image.schema", - "title": "NGFF Image", - "description": "JSON from OME-NGFF .zattrs", - "type": "object", - "properties": { - "multiscales": { - "description": "The multiscale datasets for this image", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "datasets": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "properties": {"path": {"type": "string"}}, - "required": ["path"], - }, - }, - "version": {"type": "string", "enum": ["0.3"]}, - "axes": { - "type": "array", - "minItems": 2, - "items": {"type": "string", "pattern": "^[xyzct]$"}, - }, - }, - "required": ["datasets", "axes"], - }, - "minItems": 1, - "uniqueItems": True, - }, - "omero": { - "type": "object", - "properties": { - "channels": { - "type": "array", - "items": { - "type": "object", - "properties": { - "window": { - "type": "object", - "properties": { - "end": {"type": "number"}, - "max": {"type": "number"}, - "min": {"type": "number"}, - "start": {"type": "number"}, - }, - "required": ["start", "min", "end", "max"], - }, - "label": {"type": "string"}, - "family": {"type": "string"}, - "color": {"type": "string"}, - "active": {"type": "boolean"}, - }, - "required": ["window", "color"], - }, - } - }, - "required": ["channels"], - }, - }, - "required": ["multiscales"], -} +def get_strict_schema(version: str) -> Dict: + curr_dir = os.path.dirname(__file__) + path = os.path.join( + curr_dir, f"../schemas/{version}/schemas/json_schema/strict_image.schema" + ) + path = os.path.normpath(path) + return load_json(path) diff --git a/schemas/.DS_Store b/schemas/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..79f9a11c79c2de0669c3daf16b820ff0efed3911 GIT binary patch literal 6148 zcmeHKJ5Iwu5S<|@EYYN-+!G)oL~kM^4v-6=5DAJ{PLtjRh|_QiF2xb<`bpKrKI`uw`kpVJ52FL&zICTclvrU@wKw2_D z2FSoS19m?Ys9~Ep2KvQa#y)wn=7getI{d}cMcY7HGC&578JNX#Vg3IO`p5i#T*MU_AOk1GfX=&hx56i7 zZ>>JgdToI|K)(dJo(|)!80f7S3v0y}zw(MbBi<&CfnE;3mjn4DV7ySsz<)6C33&h_ A4FCWD literal 0 HcmV?d00001 diff --git a/schemas/0.1/schemas/.DS_Store b/schemas/0.1/schemas/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d053bb1cc0388fa27ebfb1f84760ec85f17d67bc GIT binary patch literal 6148 zcmeHKJx{|h5PgOYmAZ6f^o~S;Ul3IR7SshXLxBPUL8~McYkoTKKKQ5zjHsYH%lGW_ z-PwLp>=;0rtZoi~F@PbfV3J0Uh`D!Y%bf+2=D0wEYusawr=CQcXp*&$@W6F1@Qlyv zpDo*~(^b>f^w`BYJ?5yX3%qc53znLe0u^=D zrkQ@4d+Kw4v${DU?S(Ltq8 z0OEjV6}IIqAu-vpbnFZ{LP-`%w9w>FjAY@oXZI@|J3|Xc(wUjZXMTIUB%Pi1tnNsq zVd!8W7+7au+qz@9|Bv{|^gi;}L!uW91Oxw!fi$SjstGscck7q!$z7XSFIh#zuagFa m?X{l(cH|t{(Lo!}(xzYO*cnO|v1>Rnegu?|P{F`2Fz^Z@H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Fri, 3 Dec 2021 17:55:42 +0000 Subject: [PATCH 04/26] Add validation tests to test_cli.py --- ome_zarr/data.py | 6 +++--- tests/test_cli.py | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ome_zarr/data.py b/ome_zarr/data.py index 8ae48324..0811b74b 100644 --- a/ome_zarr/data.py +++ b/ome_zarr/data.py @@ -147,19 +147,19 @@ def create_zarr( "channels": [ { "color": "FF0000", - "window": {"start": 0, "end": 255}, + "window": {"start": 0, "end": 255, "min": 0, "max": 255}, "label": "Red", "active": True, }, { "color": "00FF00", - "window": {"start": 0, "end": 255}, + "window": {"start": 0, "end": 255, "min": 0, "max": 255}, "label": "Green", "active": True, }, { "color": "0000FF", - "window": {"start": 0, "end": 255}, + "window": {"start": 0, "end": 255, "min": 0, "max": 255}, "label": "Blue", "active": True, }, diff --git a/tests/test_cli.py b/tests/test_cli.py index 3a0d91dd..0a4357f8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -33,6 +33,15 @@ def test_astronaut_info(self): main(["create", "--method=astronaut", filename]) main(["info", filename]) + @pytest.mark.parametrize("warnings", [False, True]) + def test_astronaut_validation(self, warnings): + filename = str(self.path) + "-2" + main(["create", "--method=astronaut", filename]) + if warnings: + main(["validate", "--warnings", filename]) + else: + main(["validate", filename]) + def test_astronaut_download(self, tmpdir): out = str(tmpdir / "out") filename = str(self.path) + "-3" From 488869865e6d727cd67af172adadb423c3d4e106 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 3 Dec 2021 18:09:48 +0000 Subject: [PATCH 05/26] Add jsonschema to requirements-test.txt --- requirements/requirements-test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 9cf84846..782a9f8b 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,3 +1,4 @@ pytest pytest-cov codecov +jsonschema From a20c8aaa26630cdeddbc2ab8e973ab3e99955e36 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 3 Dec 2021 22:47:00 +0000 Subject: [PATCH 06/26] Fix jsonschema dependency --- environment.yml | 1 + requirements/requirements-test.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2dbb7d64..e1ae4d52 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ channels: dependencies: - flake8 - ipython + - jsonschema - mypy - omero-py - pip diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 782a9f8b..9cf84846 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,4 +1,3 @@ pytest pytest-cov codecov -jsonschema From 7520081d3f2ad7dc33be25b04e455cc70934a25f Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 12:26:58 +0000 Subject: [PATCH 07/26] Remove .DS_Store files --- schemas/.DS_Store | Bin 6148 -> 0 bytes schemas/0.1/schemas/.DS_Store | Bin 6148 -> 0 bytes schemas/0.1/schemas/json_schema/.DS_Store | Bin 6148 -> 0 bytes schemas/0.3/schemas/.DS_Store | Bin 6148 -> 0 bytes schemas/0.3/schemas/json_schema/.DS_Store | Bin 6148 -> 0 bytes 5 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 schemas/.DS_Store delete mode 100644 schemas/0.1/schemas/.DS_Store delete mode 100644 schemas/0.1/schemas/json_schema/.DS_Store delete mode 100644 schemas/0.3/schemas/.DS_Store delete mode 100644 schemas/0.3/schemas/json_schema/.DS_Store diff --git a/schemas/.DS_Store b/schemas/.DS_Store deleted file mode 100644 index 79f9a11c79c2de0669c3daf16b820ff0efed3911..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5Iwu5S<|@EYYN-+!G)oL~kM^4v-6=5DAJ{PLtjRh|_QiF2xb<`bpKrKI`uw`kpVJ52FL&zICTclvrU@wKw2_D z2FSoS19m?Ys9~Ep2KvQa#y)wn=7getI{d}cMcY7HGC&578JNX#Vg3IO`p5i#T*MU_AOk1GfX=&hx56i7 zZ>>JgdToI|K)(dJo(|)!80f7S3v0y}zw(MbBi<&CfnE;3mjn4DV7ySsz<)6C33&h_ A4FCWD diff --git a/schemas/0.1/schemas/.DS_Store b/schemas/0.1/schemas/.DS_Store deleted file mode 100644 index d053bb1cc0388fa27ebfb1f84760ec85f17d67bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJx{|h5PgOYmAZ6f^o~S;Ul3IR7SshXLxBPUL8~McYkoTKKKQ5zjHsYH%lGW_ z-PwLp>=;0rtZoi~F@PbfV3J0Uh`D!Y%bf+2=D0wEYusawr=CQcXp*&$@W6F1@Qlyv zpDo*~(^b>f^w`BYJ?5yX3%qc53znLe0u^=D zrkQ@4d+Kw4v${DU?S(Ltq8 z0OEjV6}IIqAu-vpbnFZ{LP-`%w9w>FjAY@oXZI@|J3|Xc(wUjZXMTIUB%Pi1tnNsq zVd!8W7+7au+qz@9|Bv{|^gi;}L!uW91Oxw!fi$SjstGscck7q!$z7XSFIh#zuagFa m?X{l(cH|t{(Lo!}(xzYO*cnO|v1>Rnegu?|P{F`2Fz^Z@H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Thu, 9 Dec 2021 12:28:45 +0000 Subject: [PATCH 08/26] Add .DS_Store to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f4b3f55c..bceb52ed 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ var build dist/ target/ +*.DS_Store +*/.DS_Store From f0683a6bd7b59e25de321890dddab5fbc0b15cbc Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 12:40:59 +0000 Subject: [PATCH 09/26] Use latest schema version by default --- ome_zarr/reader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 336fd43b..f253c81d 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -11,6 +11,7 @@ from jsonschema import validate from jsonschema.validators import validator_for +from .format import CurrentFormat from .io import ZarrLocation from .schemas import LocalRefResolver, get_schema, get_strict_schema from .types import JSONDict @@ -330,7 +331,7 @@ def array(self, resolution: str, version: str) -> da.core.Array: def validate(self, warnings: bool = False) -> None: multiscales = self.lookup("multiscales", []) - version = multiscales[0].get("version", "0.1") + version = multiscales[0].get("version", CurrentFormat().version) print("Validating Multiscales spec at", self.zarr) print("Using Multiscales schema version", version) image_schema = get_schema(version) From d093cda22e658acaa4f7f96c3de0bd813d1d1788 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 12:52:54 +0000 Subject: [PATCH 10/26] Use logging instead of print --- ome_zarr/reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index f253c81d..ca20c67a 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -332,8 +332,8 @@ def array(self, resolution: str, version: str) -> da.core.Array: def validate(self, warnings: bool = False) -> None: multiscales = self.lookup("multiscales", []) version = multiscales[0].get("version", CurrentFormat().version) - print("Validating Multiscales spec at", self.zarr) - print("Using Multiscales schema version", version) + LOGGER.info("Validating Multiscales spec at: %s" % self.zarr) + LOGGER.info("Using Multiscales schema version: %s" % version) image_schema = get_schema(version) # Always do a validation with the MUST rules @@ -351,7 +351,7 @@ def validate(self, warnings: bool = False) -> None: localResolver = LocalRefResolver.from_schema(strict_schema) validator = cls(strict_schema, resolver=localResolver) for error in validator.iter_errors(json_data): - print("WARNING", error.message) + LOGGER.warn(error.message) class OMERO(Spec): From 33298bd00308bd2d909f6a3824e026b617569abb Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 13:49:53 +0000 Subject: [PATCH 11/26] Move top-level /schemas to ome_zarr/schemas --- ome_zarr/schemas.py | 8 +++++--- .../schemas}/0.1/schemas/json_schema/image.schema | 0 .../schemas}/0.1/schemas/json_schema/plate.schema | 0 .../schemas}/0.1/schemas/json_schema/strict_image.schema | 0 .../schemas}/0.3/schemas/json_schema/image.schema | 0 .../schemas}/0.3/schemas/json_schema/strict_image.schema | 0 6 files changed, 5 insertions(+), 3 deletions(-) rename {schemas => ome_zarr/schemas}/0.1/schemas/json_schema/image.schema (100%) rename {schemas => ome_zarr/schemas}/0.1/schemas/json_schema/plate.schema (100%) rename {schemas => ome_zarr/schemas}/0.1/schemas/json_schema/strict_image.schema (100%) rename {schemas => ome_zarr/schemas}/0.3/schemas/json_schema/image.schema (100%) rename {schemas => ome_zarr/schemas}/0.3/schemas/json_schema/strict_image.schema (100%) diff --git a/ome_zarr/schemas.py b/ome_zarr/schemas.py index f94e3eda..c968f69a 100644 --- a/ome_zarr/schemas.py +++ b/ome_zarr/schemas.py @@ -4,11 +4,13 @@ from jsonschema import RefResolver +SCHEMAS_PATH = "schemas" + class LocalRefResolver(RefResolver): def resolve_remote(self, url: str) -> Dict: # Use remote URL to generate local path - rel_path = url.replace("https://ngff.openmicroscopy.org/", "../schemas/") + rel_path = url.replace("https://ngff.openmicroscopy.org", SCHEMAS_PATH) curr_dir = os.path.dirname(__file__) path = os.path.join(curr_dir, rel_path) path = os.path.normpath(path) @@ -28,7 +30,7 @@ def get_schema(version: str) -> Dict: curr_dir = os.path.dirname(__file__) # The paths here match the paths in the ngff repo (and public schemas) path = os.path.join( - curr_dir, f"../schemas/{version}/schemas/json_schema/image.schema" + curr_dir, f"{SCHEMAS_PATH}/{version}/schemas/json_schema/image.schema" ) path = os.path.normpath(path) return load_json(path) @@ -37,7 +39,7 @@ def get_schema(version: str) -> Dict: def get_strict_schema(version: str) -> Dict: curr_dir = os.path.dirname(__file__) path = os.path.join( - curr_dir, f"../schemas/{version}/schemas/json_schema/strict_image.schema" + curr_dir, f"{SCHEMAS_PATH}/{version}/schemas/json_schema/strict_image.schema" ) path = os.path.normpath(path) return load_json(path) diff --git a/schemas/0.1/schemas/json_schema/image.schema b/ome_zarr/schemas/0.1/schemas/json_schema/image.schema similarity index 100% rename from schemas/0.1/schemas/json_schema/image.schema rename to ome_zarr/schemas/0.1/schemas/json_schema/image.schema diff --git a/schemas/0.1/schemas/json_schema/plate.schema b/ome_zarr/schemas/0.1/schemas/json_schema/plate.schema similarity index 100% rename from schemas/0.1/schemas/json_schema/plate.schema rename to ome_zarr/schemas/0.1/schemas/json_schema/plate.schema diff --git a/schemas/0.1/schemas/json_schema/strict_image.schema b/ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema similarity index 100% rename from schemas/0.1/schemas/json_schema/strict_image.schema rename to ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema diff --git a/schemas/0.3/schemas/json_schema/image.schema b/ome_zarr/schemas/0.3/schemas/json_schema/image.schema similarity index 100% rename from schemas/0.3/schemas/json_schema/image.schema rename to ome_zarr/schemas/0.3/schemas/json_schema/image.schema diff --git a/schemas/0.3/schemas/json_schema/strict_image.schema b/ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema similarity index 100% rename from schemas/0.3/schemas/json_schema/strict_image.schema rename to ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema From 4c2802417f2ed3086f06ab3febac7932034590b1 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 14:00:45 +0000 Subject: [PATCH 12/26] remove get_strict_schema(). use get_schema(strict) --- ome_zarr/reader.py | 4 ++-- ome_zarr/schemas.py | 14 +++----------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index ca20c67a..92f989a3 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -13,7 +13,7 @@ from .format import CurrentFormat from .io import ZarrLocation -from .schemas import LocalRefResolver, get_schema, get_strict_schema +from .schemas import LocalRefResolver, get_schema from .types import JSONDict LOGGER = logging.getLogger("ome_zarr.reader") @@ -344,7 +344,7 @@ def validate(self, warnings: bool = False) -> None: # If we're also checking for SHOULD rules, # we want to iterate all errors and show as "Warnings" if warnings: - strict_schema = get_strict_schema(version) + strict_schema = get_schema(version, strict=True) cls = validator_for(strict_schema) cls.check_schema(strict_schema) # Use our local resolver subclass to resolve local documents diff --git a/ome_zarr/schemas.py b/ome_zarr/schemas.py index c968f69a..05f8ab3e 100644 --- a/ome_zarr/schemas.py +++ b/ome_zarr/schemas.py @@ -26,20 +26,12 @@ def load_json(path: str) -> Dict: return document -def get_schema(version: str) -> Dict: +def get_schema(version: str, strict: bool = False) -> Dict: + schema_name = "strict_image.schema" if strict else "image.schema" curr_dir = os.path.dirname(__file__) # The paths here match the paths in the ngff repo (and public schemas) path = os.path.join( - curr_dir, f"{SCHEMAS_PATH}/{version}/schemas/json_schema/image.schema" - ) - path = os.path.normpath(path) - return load_json(path) - - -def get_strict_schema(version: str) -> Dict: - curr_dir = os.path.dirname(__file__) - path = os.path.join( - curr_dir, f"{SCHEMAS_PATH}/{version}/schemas/json_schema/strict_image.schema" + curr_dir, SCHEMAS_PATH, version, "schemas", "json_schema", schema_name ) path = os.path.normpath(path) return load_json(path) From b388a3a53b7dc32818082bb75e153e69b1616501 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 9 Dec 2021 14:14:54 +0000 Subject: [PATCH 13/26] fix duplicate validate() --- ome_zarr/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 92f989a3..3ad2b3a7 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -8,7 +8,7 @@ import dask.array as da import numpy as np from dask import delayed -from jsonschema import validate +from jsonschema import validate as jsonschema_validate from jsonschema.validators import validator_for from .format import CurrentFormat @@ -339,7 +339,7 @@ def validate(self, warnings: bool = False) -> None: # Always do a validation with the MUST rules # Will throw ValidationException if it fails json_data = self.zarr.root_attrs - validate(instance=json_data, schema=image_schema) + jsonschema_validate(instance=json_data, schema=image_schema) # If we're also checking for SHOULD rules, # we want to iterate all errors and show as "Warnings" From 674ebb738f258e4b5c83b6378fb4796bc16ef5c1 Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 14 Dec 2021 14:43:22 +0000 Subject: [PATCH 14/26] Import schemas from ngff --- .isort.cfg | 2 +- ome_zarr/reader.py | 2 +- ome_zarr/schemas.py | 37 ------------------------------------- setup.py | 1 + 4 files changed, 3 insertions(+), 39 deletions(-) delete mode 100644 ome_zarr/schemas.py diff --git a/.isort.cfg b/.isort.cfg index 3088410b..be5355ca 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,5 +1,5 @@ [settings] -known_third_party = dask,jsonschema,numpy,pytest,scipy,setuptools,skimage,zarr +known_third_party = dask,jsonschema,ngff,numpy,pytest,scipy,setuptools,skimage,zarr multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 3ad2b3a7..7b152061 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -10,10 +10,10 @@ from dask import delayed from jsonschema import validate as jsonschema_validate from jsonschema.validators import validator_for +from ngff.schemas import LocalRefResolver, get_schema from .format import CurrentFormat from .io import ZarrLocation -from .schemas import LocalRefResolver, get_schema from .types import JSONDict LOGGER = logging.getLogger("ome_zarr.reader") diff --git a/ome_zarr/schemas.py b/ome_zarr/schemas.py deleted file mode 100644 index 05f8ab3e..00000000 --- a/ome_zarr/schemas.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -import os -from typing import Dict - -from jsonschema import RefResolver - -SCHEMAS_PATH = "schemas" - - -class LocalRefResolver(RefResolver): - def resolve_remote(self, url: str) -> Dict: - # Use remote URL to generate local path - rel_path = url.replace("https://ngff.openmicroscopy.org", SCHEMAS_PATH) - curr_dir = os.path.dirname(__file__) - path = os.path.join(curr_dir, rel_path) - path = os.path.normpath(path) - # Load local document and cache it - document = load_json(path) - self.store[url] = document - return document - - -def load_json(path: str) -> Dict: - with open(path) as f: - document = json.loads(f.read()) - return document - - -def get_schema(version: str, strict: bool = False) -> Dict: - schema_name = "strict_image.schema" if strict else "image.schema" - curr_dir = os.path.dirname(__file__) - # The paths here match the paths in the ngff repo (and public schemas) - path = os.path.join( - curr_dir, SCHEMAS_PATH, version, "schemas", "json_schema", schema_name - ) - path = os.path.normpath(path) - return load_json(path) diff --git a/setup.py b/setup.py index 2562add5..5158a9e6 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ def read(fname): install_requires += (["scikit-image"],) install_requires += (["toolz"],) install_requires += (["jsonschema"],) +install_requires += (["ngff"],) setup( From 7b3ab42ec3756b55709597b6473148026ec8f1ad Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 14 Dec 2021 14:44:53 +0000 Subject: [PATCH 15/26] Delete schemas --- .../0.1/schemas/json_schema/image.schema | 112 ------------------ .../0.1/schemas/json_schema/plate.schema | 112 ------------------ .../schemas/json_schema/strict_image.schema | 18 --- .../0.3/schemas/json_schema/image.schema | 109 ----------------- .../schemas/json_schema/strict_image.schema | 18 --- 5 files changed, 369 deletions(-) delete mode 100644 ome_zarr/schemas/0.1/schemas/json_schema/image.schema delete mode 100644 ome_zarr/schemas/0.1/schemas/json_schema/plate.schema delete mode 100644 ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema delete mode 100644 ome_zarr/schemas/0.3/schemas/json_schema/image.schema delete mode 100644 ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema diff --git a/ome_zarr/schemas/0.1/schemas/json_schema/image.schema b/ome_zarr/schemas/0.1/schemas/json_schema/image.schema deleted file mode 100644 index 23e24eab..00000000 --- a/ome_zarr/schemas/0.1/schemas/json_schema/image.schema +++ /dev/null @@ -1,112 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://ngff.openmicroscopy.org/0.1/schemas/json_schema/image.schema", - "title": "NGFF Image", - "description": "JSON from OME-NGFF .zattrs", - "type": "object", - "properties": { - "multiscales": { - "description": "The multiscale datasets for this image", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "datasets": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - }, - "required": ["path"] - } - }, - "version": { - "type": "string", - "enum": [ - "0.1" - ] - }, - "metadata": { - "type": "object", - "properties": { - "method": { - "type": "string" - }, - "version": { - "type": "string" - } - } - } - }, - "required": [ - "datasets" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "omero": { - "type": "object", - "properties": { - "channels": { - "type": "array", - "items": { - "type": "object", - "properties": { - "window": { - "type": "object", - "properties": { - "end": { - "type": "number" - }, - "max": { - "type": "number" - }, - "min": { - "type": "number" - }, - "start": { - "type": "number" - } - }, - "required": [ - "start", - "min", - "end", - "max" - ] - }, - "label": { - "type": "string" - }, - "family": { - "type": "string" - }, - "color": { - "type": "string" - }, - "active": { - "type": "boolean" - } - }, - "required": [ - "window", - "color" - ] - } - } - }, - "required": [ - "channels" - ] - } - }, - "required": [ "multiscales" ] -} diff --git a/ome_zarr/schemas/0.1/schemas/json_schema/plate.schema b/ome_zarr/schemas/0.1/schemas/json_schema/plate.schema deleted file mode 100644 index 1861a85e..00000000 --- a/ome_zarr/schemas/0.1/schemas/json_schema/plate.schema +++ /dev/null @@ -1,112 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "http://localhost:8000/plate.schema", - "title": "NGFF Plate", - "description": "JSON from OME-NGFF Plate .zattrs", - "type": "object", - "properties": { - "plate": { - "type": "object", - "properties": { - "version": { - "type": "string", - "enum": [ - "0.1" - ] - }, - "name": { - "type": "string" - }, - "columns": { - "description": "Columns of the Plate grid", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "rows": { - "description": "Rows of the Plate grid", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "wells": { - "description": "Rows of the Plate grid", - "type": "array", - "items": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - }, - "required": [ - "path" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "field_count": { - "description": "Maximum number of fields per view across all wells." - }, - "acquisitions": { - "description": "Rows of the Plate grid", - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "type": "number" - }, - "maximumfieldcount": { - "type": "number" - }, - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "starttime": { - "type": "number" - } - }, - "required": [ - "id" - ] - }, - "minItems": 1, - "uniqueItems": true - } - }, - "required": [ - "version", "columns", "rows", "wells" - ] - } - }, - "required": [ - "plate" - ] -} diff --git a/ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema b/ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema deleted file mode 100644 index cfb80880..00000000 --- a/ome_zarr/schemas/0.1/schemas/json_schema/strict_image.schema +++ /dev/null @@ -1,18 +0,0 @@ -{ - "allOf": [ - { - "$ref": "https://ngff.openmicroscopy.org/0.1/schemas/json_schema/image.schema" - }, - { - "properties": { - "multiscales": { - "items": { - "required": [ - "version", "metadata", "type", "name" - ] - } - } - } - } - ] -} diff --git a/ome_zarr/schemas/0.3/schemas/json_schema/image.schema b/ome_zarr/schemas/0.3/schemas/json_schema/image.schema deleted file mode 100644 index 1802487b..00000000 --- a/ome_zarr/schemas/0.3/schemas/json_schema/image.schema +++ /dev/null @@ -1,109 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://ngff.openmicroscopy.org/0.3/schemas/json_schema/image.schema", - "title": "NGFF Image", - "description": "JSON from OME-NGFF .zattrs", - "type": "object", - "properties": { - "multiscales": { - "description": "The multiscale datasets for this image", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "datasets": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "properties": { - "path": { - "type": "string" - } - }, - "required": ["path"] - } - }, - "version": { - "type": "string", - "enum": [ - "0.3" - ] - }, - "axes": { - "type": "array", - "minItems": 2, - "items": { - "type": "string", - "pattern": "^[xyzct]$" - } - } - }, - "required": [ - "datasets", "axes" - ] - }, - "minItems": 1, - "uniqueItems": true - }, - "omero": { - "type": "object", - "properties": { - "channels": { - "type": "array", - "items": { - "type": "object", - "properties": { - "window": { - "type": "object", - "properties": { - "end": { - "type": "number" - }, - "max": { - "type": "number" - }, - "min": { - "type": "number" - }, - "start": { - "type": "number" - } - }, - "required": [ - "start", - "min", - "end", - "max" - ] - }, - "label": { - "type": "string" - }, - "family": { - "type": "string" - }, - "color": { - "type": "string" - }, - "active": { - "type": "boolean" - } - }, - "required": [ - "window", - "color" - ] - } - } - }, - "required": [ - "channels" - ] - } - }, - "required": [ "multiscales" ] -} diff --git a/ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema b/ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema deleted file mode 100644 index ea541c4a..00000000 --- a/ome_zarr/schemas/0.3/schemas/json_schema/strict_image.schema +++ /dev/null @@ -1,18 +0,0 @@ -{ - "allOf": [ - { - "$ref": "https://ngff.openmicroscopy.org/0.3/schemas/json_schema/image.schema" - }, - { - "properties": { - "multiscales": { - "items": { - "required": [ - "version", "metadata", "type", "name" - ] - } - } - } - } - ] -} From d30122915e3b428c6624d578da8ebfe887ee20af Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 22 Mar 2022 16:35:52 +0000 Subject: [PATCH 16/26] Update to use 'ome_ngff' package --- .isort.cfg | 2 +- ome_zarr/reader.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index 52f1b8f3..778cc234 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,6 +1,6 @@ [settings] -known_third_party = dask,jsonschema,ngff,numcodecs,numpy,pytest,scipy,setuptools,skimage,zarr +known_third_party = dask,jsonschema,numcodecs,numpy,ome_ngff,pytest,scipy,setuptools,skimage,zarr multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 8591090b..69ac048b 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -10,7 +10,7 @@ from dask import delayed from jsonschema import validate as jsonschema_validate from jsonschema.validators import validator_for -from ngff.schemas import LocalRefResolver, get_schema +from ome_ngff.schemas import LocalRefResolver, get_schema from .axes import Axes from .format import CurrentFormat, format_from_version diff --git a/setup.py b/setup.py index 5806098c..b8034499 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def read(fname): install_requires += (["scikit-image"],) install_requires += (["toolz"],) install_requires += (["jsonschema"],) -install_requires += (["ngff"],) +install_requires += (["ome_ngff"],) setup( From c37e116f4979ba29f197d3d4ca77e997dd894146 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 23 Mar 2022 15:59:04 +0000 Subject: [PATCH 17/26] visit(path: str, func: callable) --- ome_zarr/utils.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index 7148e22b..71797987 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -3,7 +3,7 @@ import json import logging from pathlib import Path -from typing import Iterator, List +from typing import Callable, Iterator, List import dask import dask.array as da @@ -17,21 +17,26 @@ LOGGER = logging.getLogger("ome_zarr.utils") -def info(path: str, stats: bool = False) -> Iterator[Node]: - """Print information about an OME-Zarr fileset. - - All :class:`Nodes ` that are found from the given path will - be visited recursively. - """ +def visit(path: str, func: Callable) -> Iterator[Node]: + """Call func(node) for each node read from path.""" zarr = parse_url(path) assert zarr, f"not a zarr: {zarr}" reader = Reader(zarr) for node in reader(): - if not node.specs: print(f"not an ome-zarr node: {node}") continue + yield func(node) + + +def info(path: str, stats: bool = False) -> Iterator[Node]: + """Print information about an OME-Zarr fileset. + + All :class:`Nodes ` that are found from the given path will + be visited recursively. + """ + def func(node: Node) -> Node: print(node) print(" - metadata") for spec in node.specs: @@ -43,7 +48,9 @@ def info(path: str, stats: bool = False) -> Iterator[Node]: minmax = f" minmax={dask.compute(array.min(), array.max())}" print(f" - {array.shape}{minmax}") LOGGER.debug(node.data) - yield node + return node + + return visit(path, func) def validate(path: str, warnings: bool) -> Iterator[Node]: @@ -53,17 +60,13 @@ def validate(path: str, warnings: bool) -> Iterator[Node]: All :class:`Nodes ` that are found from the given path will be visited recursively. """ - zarr = parse_url(path) - assert zarr, f"not a zarr: {zarr}" - reader = Reader(zarr) - for node in reader(): - if not node.specs: - print(f"not an ome-zarr node: {node}") - continue + def func(node: Node) -> Node: if hasattr(node, "validate"): node.validate(warnings) - yield node + return node + + return visit(path, func) def download(input_path: str, output_dir: str = ".") -> None: From c1ed01653793e318583275dd388e07ecc7419f66 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 1 Jul 2022 16:35:29 +0100 Subject: [PATCH 18/26] Remove use of ngff repo. Use cached_path instead --- .isort.cfg | 2 +- ome_zarr/reader.py | 30 +++++++++++++++++++++--------- setup.py | 1 + 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.isort.cfg b/.isort.cfg index 778cc234..9accadcc 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,6 +1,6 @@ [settings] -known_third_party = dask,jsonschema,numcodecs,numpy,ome_ngff,pytest,scipy,setuptools,skimage,zarr +known_third_party = cached_path,dask,jsonschema,numcodecs,numpy,pytest,scipy,setuptools,skimage,zarr multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 69ac048b..238a1e60 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -1,5 +1,6 @@ """Reading logic for ome-zarr.""" +import json import logging import math from abc import ABC @@ -7,10 +8,11 @@ import dask.array as da import numpy as np +from cached_path import cached_path from dask import delayed +from jsonschema import Draft202012Validator as Validator +from jsonschema import RefResolver from jsonschema import validate as jsonschema_validate -from jsonschema.validators import validator_for -from ome_ngff.schemas import LocalRefResolver, get_schema from .axes import Axes from .format import CurrentFormat, format_from_version @@ -20,6 +22,15 @@ LOGGER = logging.getLogger("ome_zarr.reader") +def get_schema(name: str, version: str, strict: bool = False) -> Dict: + pre = "strict_" if strict else "" + schema_url = f"https://ngff.openmicroscopy.org/{version}/schemas/{pre}{name}.schema" + local_path = cached_path(schema_url) + with open(local_path) as f: + sch_string = f.read() + return json.loads(sch_string) + + class Node: """Container for a representation of the binary data somewhere in the data hierarchy.""" @@ -342,7 +353,7 @@ def validate(self, warnings: bool = False) -> None: version = multiscales[0].get("version", CurrentFormat().version) LOGGER.info("Validating Multiscales spec at: %s" % self.zarr) LOGGER.info("Using Multiscales schema version: %s" % version) - image_schema = get_schema(version) + image_schema = get_schema("image", version) # Always do a validation with the MUST rules # Will throw ValidationException if it fails @@ -352,12 +363,13 @@ def validate(self, warnings: bool = False) -> None: # If we're also checking for SHOULD rules, # we want to iterate all errors and show as "Warnings" if warnings: - strict_schema = get_schema(version, strict=True) - cls = validator_for(strict_schema) - cls.check_schema(strict_schema) - # Use our local resolver subclass to resolve local documents - localResolver = LocalRefResolver.from_schema(strict_schema) - validator = cls(strict_schema, resolver=localResolver) + strict_schema = get_schema("image", version, strict=True) + schema_store = { + image_schema["$id"]: image_schema, + strict_schema["$id"]: strict_schema, + } + resolver = RefResolver.from_schema(strict_schema, store=schema_store) + validator = Validator(strict_schema, resolver=resolver) for error in validator.iter_errors(json_data): LOGGER.warn(error.message) diff --git a/setup.py b/setup.py index b8034499..09ead403 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ def read(fname): install_requires += (["toolz"],) install_requires += (["jsonschema"],) install_requires += (["ome_ngff"],) +install_requires += (["cached_path"],) setup( From e3abc336a39f94d1990044836834f7021ad9dc2c Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 1 Jul 2022 16:42:33 +0100 Subject: [PATCH 19/26] Remove ome_ngff dependency --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index ed7c1c66..43e60235 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,6 @@ def read(fname): install_requires += (["scikit-image"],) install_requires += (["toolz"],) install_requires += (["jsonschema"],) -install_requires += (["ome_ngff"],) install_requires += (["cached_path"],) From 5fef946481a9828373e08c069d837d6a28c9a697 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 4 Jul 2022 10:58:59 +0100 Subject: [PATCH 20/26] Add cached_path to dependencies in environment.yml --- environment.yml | 1 + ome_zarr/reader.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/environment.yml b/environment.yml index e1ae4d52..01b20d24 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: - conda-forge - defaults dependencies: + - cached_path - flake8 - ipython - jsonschema diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index c0aa3ed5..8264a6b9 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -364,6 +364,8 @@ def validate(self, warnings: bool = False) -> None: # we want to iterate all errors and show as "Warnings" if warnings: strict_schema = get_schema("image", version, strict=True) + # we only need this store to allow use of cached schemas + # (and potential off-line use) schema_store = { image_schema["$id"]: image_schema, strict_schema["$id"]: strict_schema, From 3ad5315829f94ca7d516aac9f2c352c8b7cbd334 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 4 Jul 2022 23:33:59 +0100 Subject: [PATCH 21/26] Rename --warnings to --strict. Support Plate and Well --- ome_zarr/cli.py | 5 +-- ome_zarr/reader.py | 76 ++++++++++++++++++++++++++++++---------------- ome_zarr/utils.py | 4 +-- 3 files changed, 54 insertions(+), 31 deletions(-) diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index 14aa1e1d..adb4749a 100755 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -33,7 +33,7 @@ def info(args: argparse.Namespace) -> None: def validate(args: argparse.Namespace) -> None: """Wrap the :func:`~ome_zarr.utils.validate` method.""" config_logging(logging.WARN, args) - list(zarr_validate(args.path, args.warnings)) + list(zarr_validate(args.path, args.strict)) def download(args: argparse.Namespace) -> None: @@ -109,7 +109,8 @@ def main(args: List[str] = None) -> None: # validate parser_validate = subparsers.add_parser("validate") parser_validate.add_argument("path") - parser_validate.add_argument("--warnings", action="store_true") + parser_validate.add_argument("--strict", action="store_true", + help="validate using a strict schema") parser_validate.set_defaults(func=validate) # download diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 8264a6b9..f5133151 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -25,6 +25,10 @@ def get_schema(name: str, version: str, strict: bool = False) -> Dict: pre = "strict_" if strict else "" schema_url = f"https://ngff.openmicroscopy.org/{version}/schemas/{pre}{name}.schema" + + # plate 404 at URL above + if name in ("plate", "well"): + schema_url = f"https://raw.githubusercontent.com/ome/ngff/main/{version}/schemas/{pre}{name}.schema" local_path = cached_path(schema_url) with open(local_path) as f: sch_string = f.read() @@ -197,9 +201,40 @@ def __init__(self, node: Node) -> None: def lookup(self, key: str, default: Any) -> Any: return self.zarr.root_attrs.get(key, default) - def validate(self, warnings: bool = False) -> None: - # If not implemented, ignore for now - pass + def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + # If not implemented then validate will be no-op + return None + + def validate(self, strict: bool = False) -> None: + # multiscales = self.lookup("multiscales", []) + # version = multiscales[0].get("version", CurrentFormat().version) + schema = self.get_schema() + if schema is None: + LOGGER.info("No schema for %s" % self.zarr) + return + LOGGER.info("Validating Multiscales spec at: %s" % self.zarr) + + # Always do a validation with the MUST rules + # Will throw ValidationException if it fails + json_data = self.zarr.root_attrs + jsonschema_validate(instance=json_data, schema=schema) + + # If we're also checking for SHOULD rules, + # we want to iterate all errors and show as Warnings + if strict: + strict_schema = self.get_schema(strict=True) + if strict_schema is None: + return + # we only need this store to allow use of cached schemas + # (and potential off-line use) + schema_store = { + schema["$id"]: schema, + strict_schema["$id"]: strict_schema, + } + resolver = RefResolver.from_schema(strict_schema, store=schema_store) + validator = Validator(strict_schema, resolver=resolver) + for error in validator.iter_errors(json_data): + LOGGER.warn(error.message) class Labels(Spec): @@ -348,32 +383,10 @@ def array(self, resolution: str, version: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) - def validate(self, warnings: bool = False) -> None: + def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: multiscales = self.lookup("multiscales", []) version = multiscales[0].get("version", CurrentFormat().version) - LOGGER.info("Validating Multiscales spec at: %s" % self.zarr) - LOGGER.info("Using Multiscales schema version: %s" % version) - image_schema = get_schema("image", version) - - # Always do a validation with the MUST rules - # Will throw ValidationException if it fails - json_data = self.zarr.root_attrs - jsonschema_validate(instance=json_data, schema=image_schema) - - # If we're also checking for SHOULD rules, - # we want to iterate all errors and show as "Warnings" - if warnings: - strict_schema = get_schema("image", version, strict=True) - # we only need this store to allow use of cached schemas - # (and potential off-line use) - schema_store = { - image_schema["$id"]: image_schema, - strict_schema["$id"]: strict_schema, - } - resolver = RefResolver.from_schema(strict_schema, store=schema_store) - validator = Validator(strict_schema, resolver=resolver) - for error in validator.iter_errors(json_data): - LOGGER.warn(error.message) + return get_schema("image", version, strict) class OMERO(Spec): @@ -516,6 +529,11 @@ def get_lazy_well(level: int, tile_shape: tuple) -> da.Array: node.data = pyramid node.metadata = image_node.metadata + def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + well = self.lookup("well", {}) + version = well.get("version", CurrentFormat().version) + return get_schema("well", version, strict) + class Plate(Spec): @staticmethod @@ -613,6 +631,10 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) + def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + plate = self.lookup("plate", {}) + version = plate.get("version", CurrentFormat().version) + return get_schema("plate", version, strict) class PlateLabels(Plate): def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index 71797987..5dc41656 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -53,7 +53,7 @@ def func(node: Node) -> Node: return visit(path, func) -def validate(path: str, warnings: bool) -> Iterator[Node]: +def validate(path: str, strict: bool) -> Iterator[Node]: """ Validate OME-NGFF data @@ -63,7 +63,7 @@ def validate(path: str, warnings: bool) -> Iterator[Node]: def func(node: Node) -> Node: if hasattr(node, "validate"): - node.validate(warnings) + node.validate(strict) return node return visit(path, func) From 5fcfe9426ca3d04e7effd0b95dd4dfa173c9976b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Jul 2022 22:34:37 +0000 Subject: [PATCH 22/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ome_zarr/cli.py | 5 +++-- ome_zarr/reader.py | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index adb4749a..39fe9328 100755 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -109,8 +109,9 @@ def main(args: List[str] = None) -> None: # validate parser_validate = subparsers.add_parser("validate") parser_validate.add_argument("path") - parser_validate.add_argument("--strict", action="store_true", - help="validate using a strict schema") + parser_validate.add_argument( + "--strict", action="store_true", help="validate using a strict schema" + ) parser_validate.set_defaults(func=validate) # download diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index f5133151..0e0c25f6 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -201,7 +201,7 @@ def __init__(self, node: Node) -> None: def lookup(self, key: str, default: Any) -> Any: return self.zarr.root_attrs.get(key, default) - def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: # If not implemented then validate will be no-op return None @@ -383,7 +383,7 @@ def array(self, resolution: str, version: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) - def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: multiscales = self.lookup("multiscales", []) version = multiscales[0].get("version", CurrentFormat().version) return get_schema("image", version, strict) @@ -529,7 +529,7 @@ def get_lazy_well(level: int, tile_shape: tuple) -> da.Array: node.data = pyramid node.metadata = image_node.metadata - def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: well = self.lookup("well", {}) version = well.get("version", CurrentFormat().version) return get_schema("well", version, strict) @@ -631,11 +631,12 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - def get_schema(self, strict: Optional[bool]=False) -> Optional[Dict]: + def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: plate = self.lookup("plate", {}) version = plate.get("version", CurrentFormat().version) return get_schema("plate", version, strict) + class PlateLabels(Plate): def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover """251.zarr/A/1/0/labels/0/3/""" From 6774922707ecd7f835be300d730cff8e55f76900 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 4 Jul 2022 23:48:26 +0100 Subject: [PATCH 23/26] Update test to use --strict --- ome_zarr/reader.py | 13 ++++++++----- tests/test_cli.py | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 0e0c25f6..0c0e4bb4 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -28,7 +28,10 @@ def get_schema(name: str, version: str, strict: bool = False) -> Dict: # plate 404 at URL above if name in ("plate", "well"): - schema_url = f"https://raw.githubusercontent.com/ome/ngff/main/{version}/schemas/{pre}{name}.schema" + schema_url = ( + f"https://raw.githubusercontent.com/ome/ngff/main/" + f"{version}/schemas/{pre}{name}.schema" + ) local_path = cached_path(schema_url) with open(local_path) as f: sch_string = f.read() @@ -201,7 +204,7 @@ def __init__(self, node: Node) -> None: def lookup(self, key: str, default: Any) -> Any: return self.zarr.root_attrs.get(key, default) - def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: + def get_schema(self, strict: bool = False) -> Optional[Dict]: # If not implemented then validate will be no-op return None @@ -383,7 +386,7 @@ def array(self, resolution: str, version: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) - def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: + def get_schema(self, strict: bool = False) -> Optional[Dict]: multiscales = self.lookup("multiscales", []) version = multiscales[0].get("version", CurrentFormat().version) return get_schema("image", version, strict) @@ -529,7 +532,7 @@ def get_lazy_well(level: int, tile_shape: tuple) -> da.Array: node.data = pyramid node.metadata = image_node.metadata - def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: + def get_schema(self, strict: bool = False) -> Optional[Dict]: well = self.lookup("well", {}) version = well.get("version", CurrentFormat().version) return get_schema("well", version, strict) @@ -631,7 +634,7 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - def get_schema(self, strict: Optional[bool] = False) -> Optional[Dict]: + def get_schema(self, strict: bool = False) -> Optional[Dict]: plate = self.lookup("plate", {}) version = plate.get("version", CurrentFormat().version) return get_schema("plate", version, strict) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0a4357f8..dc8f99f9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -33,12 +33,12 @@ def test_astronaut_info(self): main(["create", "--method=astronaut", filename]) main(["info", filename]) - @pytest.mark.parametrize("warnings", [False, True]) - def test_astronaut_validation(self, warnings): + @pytest.mark.parametrize("strict", [False, True]) + def test_astronaut_validation(self, strict): filename = str(self.path) + "-2" main(["create", "--method=astronaut", filename]) - if warnings: - main(["validate", "--warnings", filename]) + if strict: + main(["validate", "--strict", filename]) else: main(["validate", filename]) From 2bbce18e46c9d2c3f51357b0dd7c0d87984262ab Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 5 Jul 2022 13:26:56 +0100 Subject: [PATCH 24/26] Support --clear_cache argument for validate --- ome_zarr/cli.py | 7 ++++++- ome_zarr/reader.py | 11 ++--------- ome_zarr/utils.py | 8 +++++++- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index 39fe9328..6f2e6f0a 100755 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -33,7 +33,7 @@ def info(args: argparse.Namespace) -> None: def validate(args: argparse.Namespace) -> None: """Wrap the :func:`~ome_zarr.utils.validate` method.""" config_logging(logging.WARN, args) - list(zarr_validate(args.path, args.strict)) + list(zarr_validate(args.path, args.strict, args.clear_cache)) def download(args: argparse.Namespace) -> None: @@ -112,6 +112,11 @@ def main(args: List[str] = None) -> None: parser_validate.add_argument( "--strict", action="store_true", help="validate using a strict schema" ) + parser_validate.add_argument( + "--clear_cache", + action="store_true", + help="Remove any cached schemas to force reload", + ) parser_validate.set_defaults(func=validate) # download diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 0c0e4bb4..4be1f885 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -25,13 +25,6 @@ def get_schema(name: str, version: str, strict: bool = False) -> Dict: pre = "strict_" if strict else "" schema_url = f"https://ngff.openmicroscopy.org/{version}/schemas/{pre}{name}.schema" - - # plate 404 at URL above - if name in ("plate", "well"): - schema_url = ( - f"https://raw.githubusercontent.com/ome/ngff/main/" - f"{version}/schemas/{pre}{name}.schema" - ) local_path = cached_path(schema_url) with open(local_path) as f: sch_string = f.read() @@ -127,11 +120,11 @@ def load(self, spec_type: Type["Spec"]) -> Optional["Spec"]: return spec return None - def validate(self, warnings: bool) -> None: + def validate(self, strict: bool) -> None: # Validation for a node is delegated to each spec # e.g. Labels may have spec for multiscales and labels for spec in self.specs: - spec.validate(warnings) + spec.validate(strict) def add( self, diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index 5dc41656..5985e2a0 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -2,12 +2,14 @@ import json import logging +import shutil from pathlib import Path from typing import Callable, Iterator, List import dask import dask.array as da import zarr +from cached_path import get_cache_dir from dask.diagnostics import ProgressBar from .io import parse_url @@ -53,7 +55,7 @@ def func(node: Node) -> Node: return visit(path, func) -def validate(path: str, strict: bool) -> Iterator[Node]: +def validate(path: str, strict: bool, clear_cache: bool = False) -> Iterator[Node]: """ Validate OME-NGFF data @@ -61,6 +63,10 @@ def validate(path: str, strict: bool) -> Iterator[Node]: be visited recursively. """ + if clear_cache: + dir_path = get_cache_dir() + shutil.rmtree(dir_path, ignore_errors=True) + def func(node: Node) -> Node: if hasattr(node, "validate"): node.validate(strict) From 96961bd0e50b1a7a459ad463892b408e94d6dd7d Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 5 Jul 2022 15:51:52 +0100 Subject: [PATCH 25/26] Replace spec.get_schema() with SCHEMA_NAME --- ome_zarr/reader.py | 53 +++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 4be1f885..c9daac35 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -182,6 +182,9 @@ class Spec(ABC): Multiple subclasses may apply. """ + SCHEMA_NAME: str + version: str + @staticmethod def matches(zarr: ZarrLocation) -> bool: raise NotImplementedError() @@ -197,18 +200,12 @@ def __init__(self, node: Node) -> None: def lookup(self, key: str, default: Any) -> Any: return self.zarr.root_attrs.get(key, default) - def get_schema(self, strict: bool = False) -> Optional[Dict]: - # If not implemented then validate will be no-op - return None - def validate(self, strict: bool = False) -> None: - # multiscales = self.lookup("multiscales", []) - # version = multiscales[0].get("version", CurrentFormat().version) - schema = self.get_schema() - if schema is None: + if not hasattr(self, "SCHEMA_NAME"): LOGGER.info("No schema for %s" % self.zarr) return LOGGER.info("Validating Multiscales spec at: %s" % self.zarr) + schema = get_schema(self.SCHEMA_NAME, self.version) # Always do a validation with the MUST rules # Will throw ValidationException if it fails @@ -218,7 +215,7 @@ def validate(self, strict: bool = False) -> None: # If we're also checking for SHOULD rules, # we want to iterate all errors and show as Warnings if strict: - strict_schema = self.get_schema(strict=True) + strict_schema = get_schema(self.SCHEMA_NAME, self.version, strict=True) if strict_schema is None: return # we only need this store to allow use of cached schemas @@ -321,6 +318,9 @@ def __init__(self, node: Node) -> None: class Multiscales(Spec): + + SCHEMA_NAME = "image" + @staticmethod def matches(zarr: ZarrLocation) -> bool: """is multiscales metadata present?""" @@ -334,12 +334,10 @@ def __init__(self, node: Node) -> None: try: multiscales = self.lookup("multiscales", []) - version = multiscales[0].get( - "version", "0.1" - ) # should this be matched with Format.version? + self.version = multiscales[0].get("version", CurrentFormat().version) datasets = multiscales[0]["datasets"] axes = multiscales[0].get("axes") - fmt = format_from_version(version) + fmt = format_from_version(self.version) # Raises ValueError if not valid axes_obj = Axes(axes, fmt) node.metadata["axes"] = axes_obj.to_list() @@ -354,7 +352,7 @@ def __init__(self, node: Node) -> None: return # EARLY EXIT for resolution in self.datasets: - data: da.core.Array = self.array(resolution, version) + data: da.core.Array = self.array(resolution, self.version) chunk_sizes = [ str(c[0]) + (" (+ %s)" % c[-1] if c[-1] != c[0] else "") for c in data.chunks @@ -379,11 +377,6 @@ def array(self, resolution: str, version: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) - def get_schema(self, strict: bool = False) -> Optional[Dict]: - multiscales = self.lookup("multiscales", []) - version = multiscales[0].get("version", CurrentFormat().version) - return get_schema("image", version, strict) - class OMERO(Spec): @staticmethod @@ -455,6 +448,9 @@ def __init__(self, node: Node) -> None: class Well(Spec): + + SCHEMA_NAME = "well" + @staticmethod def matches(zarr: ZarrLocation) -> bool: return bool("well" in zarr.root_attrs) @@ -462,6 +458,7 @@ def matches(zarr: ZarrLocation) -> bool: def __init__(self, node: Node) -> None: super().__init__(node) self.well_data = self.lookup("well", {}) + self.version = self.well_data.get("version", CurrentFormat().version) LOGGER.info("well_data: %s", self.well_data) image_paths = [image["path"] for image in self.well_data.get("images")] @@ -525,13 +522,11 @@ def get_lazy_well(level: int, tile_shape: tuple) -> da.Array: node.data = pyramid node.metadata = image_node.metadata - def get_schema(self, strict: bool = False) -> Optional[Dict]: - well = self.lookup("well", {}) - version = well.get("version", CurrentFormat().version) - return get_schema("well", version, strict) - class Plate(Spec): + + SCHEMA_NAME = "plate" + @staticmethod def matches(zarr: ZarrLocation) -> bool: return bool("plate" in zarr.root_attrs) @@ -539,6 +534,9 @@ def matches(zarr: ZarrLocation) -> bool: def __init__(self, node: Node) -> None: super().__init__(node) LOGGER.debug(f"Plate created with ZarrLocation fmt:{ self.zarr.fmt}") + self.plate_data = self.lookup("plate", {}) + self.version = self.plate_data.get("version", CurrentFormat().version) + LOGGER.info("plate_data: %s", self.plate_data) self.get_pyramid_lazy(node) def get_pyramid_lazy(self, node: Node) -> None: @@ -546,8 +544,6 @@ def get_pyramid_lazy(self, node: Node) -> None: Return a pyramid of dask data, where the highest resolution is the stitched full-resolution images. """ - self.plate_data = self.lookup("plate", {}) - LOGGER.info("plate_data: %s", self.plate_data) self.rows = self.plate_data.get("rows") self.columns = self.plate_data.get("columns") self.first_field = "0" @@ -627,11 +623,6 @@ def get_tile(tile_name: str) -> np.ndarray: lazy_rows.append(da.concatenate(lazy_row, axis=len(self.axes) - 1)) return da.concatenate(lazy_rows, axis=len(self.axes) - 2) - def get_schema(self, strict: bool = False) -> Optional[Dict]: - plate = self.lookup("plate", {}) - version = plate.get("version", CurrentFormat().version) - return get_schema("plate", version, strict) - class PlateLabels(Plate): def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover From c03b0a054285abb5f65cc8014d3fa78ff0c8182d Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 5 Jul 2022 16:43:43 +0100 Subject: [PATCH 26/26] Use format.get_metadata_version(data) to get version Need to create a format instance (of the correct version) since this isn't a static methods --- ome_zarr/format.py | 4 ++-- ome_zarr/reader.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ome_zarr/format.py b/ome_zarr/format.py index 5bca76de..959cd47e 100644 --- a/ome_zarr/format.py +++ b/ome_zarr/format.py @@ -64,7 +64,7 @@ def init_store(self, path: str, mode: str = "r") -> FSStore: def init_channels(self) -> None: # pragma: no cover raise NotImplementedError() - def _get_metadata_version(self, metadata: dict) -> Optional[str]: + def get_metadata_version(self, metadata: dict) -> Optional[str]: """ Checks the metadata dict for a version @@ -127,7 +127,7 @@ def version(self) -> str: return "0.1" def matches(self, metadata: dict) -> bool: - version = self._get_metadata_version(metadata) + version = self.get_metadata_version(metadata) LOGGER.debug(f"{self.version} matches {version}?") return version == self.version diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index c9daac35..381b98b6 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -15,7 +15,7 @@ from jsonschema import validate as jsonschema_validate from .axes import Axes -from .format import CurrentFormat, format_from_version +from .format import CurrentFormat, detect_format, format_from_version from .io import ZarrLocation from .types import JSONDict @@ -192,6 +192,9 @@ def matches(zarr: ZarrLocation) -> bool: def __init__(self, node: Node) -> None: self.node = node self.zarr = node.zarr + fmt = detect_format(self.zarr.root_attrs, CurrentFormat()) + version = fmt.get_metadata_version(self.zarr.root_attrs) + self.version = version if version is not None else fmt.version LOGGER.debug(f"treating {self.zarr} as {self.__class__.__name__}") for k, v in self.zarr.root_attrs.items(): LOGGER.info("root_attr: %s", k) @@ -334,7 +337,6 @@ def __init__(self, node: Node) -> None: try: multiscales = self.lookup("multiscales", []) - self.version = multiscales[0].get("version", CurrentFormat().version) datasets = multiscales[0]["datasets"] axes = multiscales[0].get("axes") fmt = format_from_version(self.version) @@ -458,7 +460,6 @@ def matches(zarr: ZarrLocation) -> bool: def __init__(self, node: Node) -> None: super().__init__(node) self.well_data = self.lookup("well", {}) - self.version = self.well_data.get("version", CurrentFormat().version) LOGGER.info("well_data: %s", self.well_data) image_paths = [image["path"] for image in self.well_data.get("images")] @@ -535,7 +536,6 @@ def __init__(self, node: Node) -> None: super().__init__(node) LOGGER.debug(f"Plate created with ZarrLocation fmt:{ self.zarr.fmt}") self.plate_data = self.lookup("plate", {}) - self.version = self.plate_data.get("version", CurrentFormat().version) LOGGER.info("plate_data: %s", self.plate_data) self.get_pyramid_lazy(node)