From 64bbbd8fce888cb45ed74763e21381831b2f51a9 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 15 Nov 2021 21:53:45 -0500 Subject: [PATCH 001/109] add v3 store classes Define the StoreV3 class and create v3 versions of most existing stores Add a test_storage_v3.py with test classes inheriting from their v2 counterparts. Only a subset of methods involving differences in v3 behavior were overridden. --- zarr/_storage/store.py | 251 +++++++++- zarr/meta.py | 235 +++++++++- zarr/storage.py | 731 +++++++++++++++++++++++++---- zarr/tests/test_storage.py | 18 +- zarr/tests/test_storage_v3.py | 847 ++++++++++++++++++++++++++++++++++ zarr/tests/util.py | 6 +- 6 files changed, 1996 insertions(+), 92 deletions(-) create mode 100644 zarr/tests/test_storage_v3.py diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 6f5bf78e28..0ff9e0c043 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,7 +1,10 @@ +import json +import sys from collections.abc import MutableMapping +from string import ascii_letters, digits from typing import Any, List, Optional, Union -from zarr.meta import Metadata2 +from zarr.meta import Metadata2, Metadata3, _default_entry_point_metadata_v3 from zarr.util import normalize_storage_path # v2 store keys @@ -131,6 +134,169 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) +class StoreV3(BaseStore): + _store_version = 3 + _metadata_class = Metadata3 + + @staticmethod + def _valid_key(key: str) -> bool: + """ + Verify that a key conforms to the specification. + + A key is any string containing only character in the range a-z, A-Z, + 0-9, or in the set /.-_ it will return True if that's the case, False + otherwise. + + In addition, in spec v3, keys can only start with the prefix meta/, + data/ or be exactly zarr.json and should not end with /. This should + not be exposed to the user, and is a store implementation detail, so + this method will raise a ValueError in that case. + """ + if sys.version_info > (3, 7): + if not key.isascii(): + return False + if set(key) - set(ascii_letters + digits + "/.-_"): + return False + + if ( + not key.startswith("data/") + and (not key.startswith("meta/")) + and (not key == "zarr.json") + ): + raise ValueError("keys starts with unexpected value: `{}`".format(key)) + + if key.endswith('/'): + raise ValueError("keys may not end in /") + + return True + + def list_prefix(self, prefix): + if prefix.startswith('/'): + raise ValueError("prefix must not begin with /") + # TODO: force prefix to end with /? + return [k for k in self.list() if k.startswith(prefix)] + + def erase(self, key): + self.__delitem__(key) + + def erase_prefix(self, prefix): + assert prefix.endswith("/") + + if prefix == "/": + all_keys = self.list() + else: + all_keys = self.list_prefix(prefix) + for key in all_keys: + self.erase(key) + + def list_dir(self, prefix): + """ + Note: carefully test this with trailing/leading slashes + """ + if prefix: # allow prefix = "" ? + assert prefix.endswith("/") + + all_keys = self.list_prefix(prefix) + len_prefix = len(prefix) + keys = [] + prefixes = [] + for k in all_keys: + trail = k[len_prefix:] + if "/" not in trail: + keys.append(prefix + trail) + else: + prefixes.append(prefix + trail.split("/", maxsplit=1)[0] + "/") + return keys, list(set(prefixes)) + + def list(self): + if hasattr(self, 'keys'): + return list(self.keys()) + raise NotImplementedError( + "The list method has not been implemented for this store type." + ) + + # TODO: Remove listdir? This method is just to match the current V2 stores + # The v3 spec mentions: list, list_dir, list_prefix + def listdir(self, path: str = ""): + if path and not path.endswith("/"): + path = path + "/" + keys, prefixes = self.list_dir(path) + prefixes = [p[len(path):].rstrip("/") for p in prefixes] + keys = [k[len(path):] for k in keys] + return keys + prefixes + + # TODO: rmdir here is identical to the rmdir on Store so could potentially + # move to BaseStore instead. + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + def __contains__(self, key): + # TODO: re-enable this check? + # if not key.startswith(("meta/", "data/")): + # raise ValueError( + # f'Key must start with either "meta/" or "data/". ' + # f'Got {key}' + # ) + return key in self.list() + + def clear(self): + """Remove all items from store.""" + self.erase_prefix("/") + + def __eq__(self, other): + from zarr.storage import KVStoreV3 # avoid circular import + if isinstance(other, KVStoreV3): + return self._mutable_mapping == other._mutable_mapping + else: + return NotImplemented + + @staticmethod + def _ensure_store(store): + """ + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``Store``, which is slightly + different than ``MutableMapping``. + + We'll do this conversion in a few places automatically + """ + from zarr.storage import KVStoreV3 # avoid circular import + if store is None: + return None + elif isinstance(store, StoreV3): + return store + elif isinstance(store, MutableMapping): + return KVStoreV3(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStoreV3(store) + + raise ValueError( + "v3 stores must be subclasses of StoreV3, " + "if your store exposes the MutableMapping interface wrap it in " + f"Zarr.storage.KVStoreV3. Got {store}" + ) + + +# allow MutableMapping for backwards compatibility +StoreLike = Union[BaseStore, MutableMapping] + + def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: @@ -140,17 +306,49 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix +# TODO: Should this return default metadata or raise an Error if zarr.json +# is absent? +def _get_hierarchy_metadata(store=None): + meta = _default_entry_point_metadata_v3 + if store is not None: + version = getattr(store, '_store_version', 2) + if version < 3: + raise ValueError("zarr.json hierarchy metadata not stored for " + f"zarr v{version} stores") + if 'zarr.json' in store: + meta = store._metadata_class.decode_hierarchy_metadata(store['zarr.json']) + return meta + + def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) - for key in list(store.keys()): - if key.startswith(src_prefix): - new_key = dst_prefix + key.lstrip(src_prefix) - store[new_key] = store.pop(key) - - -def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None: + version = getattr(store, '_store_version', 2) + if version == 2: + root_prefixes = [''] + elif version == 3: + root_prefixes = ['meta/root/', 'data/root/'] + for root_prefix in root_prefixes: + _src_prefix = root_prefix + src_prefix + _dst_prefix = root_prefix + dst_prefix + for key in list(store.keys()): + if key.startswith(_src_prefix): + new_key = _dst_prefix + key.lstrip(_src_prefix) + store[new_key] = store.pop(key) + if version == 3: + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + _src_array_json = 'meta/root/' + src_prefix[:-1] + '.array' + sfx + if _src_array_json in store: + new_key = 'meta/root/' + dst_prefix[:-1] + '.array' + sfx + store[new_key] = store.pop(_src_array_json) + _src_group_json = 'meta/root/' + src_prefix[:-1] + '.group' + sfx + if _src_group_json in store: + new_key = 'meta/root/' + dst_prefix[:-1] + '.group' + sfx + store[new_key] = store.pop(_src_group_json) + + +def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: # assume path already normalized prefix = _path_to_prefix(path) for key in list(store.keys()): @@ -168,3 +366,40 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str child = suffix.split('/')[0] children.add(child) return sorted(children) + + +def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: + if getattr(store, "_store_version", 2) == 3: + if prefix: + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + key = "meta/root/" + prefix.rstrip("/") + ".array" + sfx + else: + raise ValueError("prefix must be supplied to get a v3 array key") + else: + key = prefix + array_meta_key + return key + + +def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: + if getattr(store, "_store_version", 2) == 3: + if prefix: + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + key = "meta/root/" + prefix.rstrip('/') + ".group" + sfx + else: + raise ValueError("prefix must be supplied to get a v3 group key") + else: + key = prefix + group_meta_key + return key + + +def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: + if getattr(store, "_store_version", 2) == 3: + # for v3, attributes are stored in the array metadata + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + if prefix: + key = "meta/root/" + prefix.rstrip('/') + ".array" + sfx + else: + raise ValueError("prefix must be supplied to get a v3 array key") + else: + key = prefix + attrs_key + return key diff --git a/zarr/meta.py b/zarr/meta.py index c292b09a14..07fbdcb7d4 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,4 +1,6 @@ import base64 +import itertools +import os from collections.abc import Mapping import numpy as np @@ -9,6 +11,37 @@ from typing import cast, Union, Any, List, Mapping as MappingType ZARR_FORMAT = 2 +ZARR_FORMAT_v3 = 3 + +FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} + + +_v3_core_type = set( + "".join(d) + for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8")) +) +_v3_core_type = {"bool", "i1", "u1"} | _v3_core_type + +ZARR_V3_CORE_DTYPES_ONLY = int(os.environ.get("ZARR_V3_CORE_DTYPES_ONLY", False)) +ZARR_V3_ALLOW_COMPLEX = int(os.environ.get("ZARR_V3_ALLOW_COMPLEX", + not ZARR_V3_CORE_DTYPES_ONLY)) +ZARR_V3_ALLOW_DATETIME = int(os.environ.get("ZARR_V3_ALLOW_DATETIME", + not ZARR_V3_CORE_DTYPES_ONLY)) +ZARR_V3_ALLOW_STRUCTURED = int(os.environ.get("ZARR_V3_ALLOW_STRUCTURED", + not ZARR_V3_CORE_DTYPES_ONLY)) +ZARR_V3_ALLOW_OBJECTARRAY = int(os.environ.get("ZARR_V3_ALLOW_OBJECTARRAY", + not ZARR_V3_CORE_DTYPES_ONLY)) +ZARR_V3_ALLOW_BYTES_ARRAY = int(os.environ.get("ZARR_V3_ALLOW_BYTES_ARRAY", + not ZARR_V3_CORE_DTYPES_ONLY)) +ZARR_V3_ALLOW_UNICODE_ARRAY = int(os.environ.get("ZARR_V3_ALLOW_UNICODE_ARRAY", + not ZARR_V3_CORE_DTYPES_ONLY)) + +_default_entry_point_metadata_v3 = { + 'zarr_format': "https://purl.org/zarr/spec/protocol/core/3.0", + 'metadata_encoding': "https://purl.org/zarr/spec/protocol/core/3.0", + 'metadata_key_suffix': '.json', + "extensions": [], +} class Metadata2: @@ -228,7 +261,207 @@ def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return v -# expose class methods for backwards compatibility +class Metadata3(Metadata2): + ZARR_FORMAT = ZARR_FORMAT_v3 + + @classmethod + def decode_dtype(cls, d): + d = cls._decode_dtype_descr(d) + dtype = np.dtype(d) + if dtype.kind == 'c': + if not ZARR_V3_ALLOW_COMPLEX: + raise ValueError("complex-valued arrays not supported") + elif dtype.kind in 'mM': + if not ZARR_V3_ALLOW_DATETIME: + raise ValueError( + "datetime64 and timedelta64 arrays not supported" + ) + elif dtype.kind == 'O': + if not ZARR_V3_ALLOW_OBJECTARRAY: + raise ValueError("object arrays not supported") + elif dtype.kind == 'V': + if not ZARR_V3_ALLOW_STRUCTURED: + raise ValueError("structured arrays not supported") + elif dtype.kind == 'U': + if not ZARR_V3_ALLOW_UNICODE_ARRAY: + raise ValueError("unicode arrays not supported") + elif dtype.kind == 'S': + if not ZARR_V3_ALLOW_BYTES_ARRAY: + raise ValueError("bytes arrays not supported") + else: + assert d in _v3_core_type + return dtype + + @classmethod + def encode_dtype(cls, d): + s = Metadata2.encode_dtype(d) + if s == "|b1": + return "bool" + elif s == "|u1": + return "u1" + elif s == "|i1": + return "i1" + dtype = np.dtype(d) + if dtype.kind == "c": + if not ZARR_V3_ALLOW_COMPLEX: + raise ValueError( + "complex-valued arrays not part of the base v3 spec" + ) + elif dtype.kind in "mM": + if not ZARR_V3_ALLOW_DATETIME: + raise ValueError( + "datetime64 and timedelta64 not part of the base v3 " + "spec" + ) + elif dtype.kind == "O": + if not ZARR_V3_ALLOW_OBJECTARRAY: + raise ValueError( + "object dtypes are not part of the base v3 spec" + ) + elif dtype.kind == "V": + if not ZARR_V3_ALLOW_STRUCTURED: + raise ValueError( + "structured arrays are not part of the base v3 spec" + ) + elif dtype.kind == 'U': + if not ZARR_V3_ALLOW_UNICODE_ARRAY: + raise ValueError("unicode dtypes are not part of the base v3 " + "spec") + elif dtype.kind == 'S': + if not ZARR_V3_ALLOW_BYTES_ARRAY: + raise ValueError("bytes dtypes are not part of the base v3 " + "spec") + else: + assert s in _v3_core_type + return s + + @classmethod + def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + meta = cls.parse_metadata(s) + # 1 / 0 + # # check metadata format version + # zarr_format = meta.get("zarr_format", None) + # if zarr_format != cls.ZARR_FORMAT: + # raise MetadataError("unsupported zarr format: %s" % zarr_format) + + assert 'attributes' in meta + # meta = dict(attributes=meta['attributes']) + return meta + + # return json.loads(s) + + @classmethod + def encode_group_metadata(cls, meta=None) -> bytes: + # The ZARR_FORMAT should not be in the group metadata, but in the + # entry point metadata instead + # meta = dict(zarr_format=cls.ZARR_FORMAT) + if meta is None: + meta = {'attributes': {}} + meta = dict(attributes=meta.get("attributes", {})) + return json_dumps(meta) + + @classmethod + def encode_hierarchy_metadata(cls, meta=None) -> bytes: + if meta is None: + meta = _default_entry_point_metadata_v3 + elif set(meta.keys()) != { + "zarr_format", + "metadata_encoding", + "metadata_key_suffix", + "extensions", + }: + raise ValueError(f"Unexpected keys in metadata. meta={meta}") + return json_dumps(meta) + + @classmethod + def decode_hierarchy_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + meta = cls.parse_metadata(s) + # check metadata format + # zarr_format = meta.get("zarr_format", None) + # if zarr_format != "https://purl.org/zarr/spec/protocol/core/3.0": + # raise MetadataError("unsupported zarr format: %s" % zarr_format) + if set(meta.keys()) != { + "zarr_format", + "metadata_encoding", + "metadata_key_suffix", + "extensions", + }: + raise ValueError(f"Unexpected keys in metdata. meta={meta}") + return meta + + @classmethod + def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + meta = cls.parse_metadata(s) + + # check metadata format + zarr_format = meta.get("zarr_format", None) + if zarr_format != cls.ZARR_FORMAT: + raise MetadataError("unsupported zarr format: %s" % zarr_format) + + # extract array metadata fields + try: + dtype = cls.decode_dtype(meta["data_type"]) + if dtype.hasobject: + import numcodecs + object_codec = numcodecs.get_codec(meta['attributes']['filters'][0]) + else: + object_codec = None + fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) + # TODO: remove dimension_separator? + meta = dict( + zarr_format=meta["zarr_format"], + shape=tuple(meta["shape"]), + chunk_grid=dict( + type=meta["chunk_grid"]["type"], + chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), + separator=meta["chunk_grid"]["separator"], + ), + data_type=dtype, + compressor=meta["compressor"], + fill_value=fill_value, + chunk_memory_layout=meta["chunk_memory_layout"], + dimension_separator=meta.get("dimension_separator", "/"), + attributes=meta["attributes"], + ) + # dimension_separator = meta.get("dimension_separator", None) + # if dimension_separator: + # meta["dimension_separator"] = dimension_separator + except Exception as e: + raise MetadataError("error decoding metadata: %s" % e) + else: + return meta + + @classmethod + def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: + dtype = meta["data_type"] + sdshape = () + if dtype.subdtype is not None: + dtype, sdshape = dtype.subdtype + dimension_separator = meta.get("dimension_separator") + if dtype.hasobject: + import numcodecs + object_codec = numcodecs.get_codec(meta['attributes']['filters'][0]) + else: + object_codec = None + meta = dict( + zarr_format=cls.ZARR_FORMAT, + shape=meta["shape"] + sdshape, + chunk_grid=dict( + type=meta["chunk_grid"]["type"], + chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), + separator=meta["chunk_grid"]["separator"], + ), + data_type=cls.encode_dtype(dtype), + compressor=meta["compressor"], + fill_value=encode_fill_value(meta["fill_value"], dtype, object_codec), + chunk_memory_layout=meta["chunk_memory_layout"], + attributes=meta.get("attributes", {}), + ) + if dimension_separator: + meta["dimension_separator"] = dimension_separator + return json_dumps(meta) + + parse_metadata = Metadata2.parse_metadata decode_array_metadata = Metadata2.decode_array_metadata encode_array_metadata = Metadata2.encode_array_metadata diff --git a/zarr/storage.py b/zarr/storage.py index 7170eeaf23..00ca4591b4 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -57,15 +57,19 @@ normalize_shape, normalize_storage_path, retry_call) from zarr._storage.absstore import ABSStore # noqa: F401 -from zarr._storage.store import (_listdir_from_keys, - _path_to_prefix, +from zarr._storage.store import (_get_hierarchy_metadata, + _listdir_from_keys, _rename_from_keys, _rmdir_from_keys, + _path_to_prefix, + _prefix_to_array_key, + _prefix_to_group_key, array_meta_key, group_meta_key, attrs_key, BaseStore, - Store) + Store, + StoreV3) __doctest_requires__ = { ('RedisStore', 'RedisStore.*'): ['redis'], @@ -92,40 +96,95 @@ def contains_array(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) - key = prefix + array_meta_key + key = _prefix_to_array_key(store, prefix) return key in store -def contains_group(store: StoreLike, path: Path = None) -> bool: +def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) - key = prefix + group_meta_key - return key in store + key = _prefix_to_group_key(store, prefix) + store_version = getattr(store, '_store_version', 2) + if store_version == 2 or explicit_only: + return key in store + else: + if key in store: + return True + # for v3, need to also handle implicit groups + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + implicit_prefix = key.replace('.group' + sfx, '') + if not implicit_prefix.endswith('/'): + implicit_prefix += '/' + if store.list_prefix(implicit_prefix): # type: ignore + return True + return False -def normalize_store_arg(store: Any, clobber=False, storage_options=None, mode="w") -> BaseStore: +def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", + *, zarr_version=None) -> Store: + if zarr_version is None: + # default to v2 store for backward compatibility + zarr_version = getattr(store, '_store_version', 2) + if zarr_version not in [2, 3]: + raise ValueError("zarr_version must be 2 or 3") if store is None: - return BaseStore._ensure_store(dict()) - elif isinstance(store, os.PathLike): + if zarr_version == 2: + store = KVStore(dict()) + else: + store = KVStoreV3(dict()) + # add default zarr.json metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + return store + if isinstance(store, os.PathLike): store = os.fspath(store) if isinstance(store, str): mode = mode if clobber else "r" - if "://" in store or "::" in store: - return FSStore(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): - return ZipStore(store, mode=mode) - elif store.endswith('.n5'): - from zarr.n5 import N5Store - return N5Store(store) - else: - return DirectoryStore(store) - else: - if not isinstance(store, BaseStore) and isinstance(store, MutableMapping): - store = BaseStore._ensure_store(store) - return store + if zarr_version == 2: + if "://" in store or "::" in store: + return FSStore(store, mode=mode, **(storage_options or {})) + elif storage_options: + raise ValueError("storage_options passed with non-fsspec path") + if store.endswith('.zip'): + return ZipStore(store, mode=mode) + elif store.endswith('.n5'): + from zarr.n5 import N5Store + return N5Store(store) + else: + return DirectoryStore(store) + elif zarr_version == 3: + if "://" in store or "::" in store: + store = FSStoreV3(store, mode=mode, **(storage_options or {})) + elif storage_options: + store = ValueError("storage_options passed with non-fsspec path") + if store.endswith('.zip'): + store = ZipStoreV3(store, mode=mode) + elif store.endswith('.n5'): + raise NotImplementedError("N5Store not yet implemented for V3") + # return N5StoreV3(store) + else: + store = DirectoryStoreV3(store) + # add default zarr.json metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + return store + elif zarr_version == 2: + store = Store._ensure_store(store) + if getattr(store, '_store_version', 2) != 2: + raise ValueError( + "provided store does not match the specified zarr version.") + # if not isinstance(store, Store) and isinstance(store, MutableMapping): + # store = KVStore(store) + elif zarr_version == 3: + store = StoreV3._ensure_store(store) + if getattr(store, '_store_version', 2) != 3: + raise ValueError( + "provided store does not match the specified zarr version.") + # if not isinstance(store, StoreV3) and isinstance(store, MutableMapping): + # store = KVStoreV3(store) + if 'zarr.json' not in store: + # add default zarr.json metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + return store def rmdir(store: StoreLike, path: Path = None): @@ -133,15 +192,36 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore - # pass through - store.rmdir(path) # type: ignore + if getattr(store, '_store_version', 2) == 2: + if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore + # pass through + store.rmdir(path) # type: ignore + else: + # slow version, delete one key at a time + _rmdir_from_keys(store, path) else: - # slow version, delete one key at a time - _rmdir_from_keys(store, path) + # TODO: check behavior for v3 and fix in the Store class, deferring to + # those by default + + # remove metadata folder + meta_dir = 'meta/root/' + path + _rmdir_from_keys(store, meta_dir) + + # remove data folder + data_dir = 'data/root/' + path + _rmdir_from_keys(store, data_dir) + # remove metadata files + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in store: + store.erase(array_meta_file) # type: ignore + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in store: + store.erase(group_meta_file) # type: ignore -def rename(store: BaseStore, src_path: Path, dst_path: Path): + +def rename(store: Store, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" @@ -163,6 +243,27 @@ def listdir(store: BaseStore, path: Path = None): if hasattr(store, 'listdir'): # pass through return store.listdir(path) # type: ignore + elif getattr(store, "_store_version", None) == 3: + meta_prefix = 'meta/root/' + dir_path = meta_prefix + path + path_start = len(meta_prefix) + meta_keys = [] + include_meta_keys = False + if include_meta_keys: + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + group_meta_key = dir_path + '.group' + sfx + if group_meta_key in store: + meta_keys.append(group_meta_key[path_start:]) + array_meta_key = dir_path + '.array' + sfx + if array_meta_key in store: + meta_keys.append(array_meta_key[path_start:]) + if not dir_path.endswith('/'): + dir_path += '/' + keys, prefixes = store.list_dir(dir_path) # type: ignore + keys = [k[path_start:] for k in keys] + prefixes = [p[path_start:] for p in prefixes] + return meta_keys + keys + prefixes + else: # slow version, iterate through all keys warnings.warn( @@ -173,33 +274,45 @@ def listdir(store: BaseStore, path: Path = None): return _listdir_from_keys(store, path) +def _getsize(store: BaseStore, path: Path = None) -> int: + # compute from size of values + if path and path in store: + v = store[path] + size = buffer_size(v) + else: + path = '' if path is None else normalize_storage_path(path) + size = 0 + store_version = getattr(store, '_store_version', 2) + if store_version == 3: + members = store.list_prefix('data/root/' + path) # type: ignore + members += store.list_prefix('meta/root/' + path) # type: ignore + # members += ['zarr.json'] + else: + members = listdir(store, path) + prefix = _path_to_prefix(path) + members = [prefix + k for k in members] + for k in members: + try: + v = store[k] + except KeyError: + pass + else: + try: + size += buffer_size(v) + except TypeError: + return -1 + return size + + def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" - path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through + path = normalize_storage_path(path) return store.getsize(path) # type: ignore elif isinstance(store, MutableMapping): - # compute from size of values - if path in store: - v = store[path] - size = buffer_size(v) - else: - members = listdir(store, path) - prefix = _path_to_prefix(path) - size = 0 - for k in members: - try: - v = store[prefix + k] - except KeyError: - pass - else: - try: - size += buffer_size(v) - except TypeError: - return -1 - return size + return _getsize(store, path) else: return -1 @@ -346,7 +459,14 @@ def init_array( path = normalize_storage_path(path) # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) + store_version = getattr(store, "_store_version", 2) + if store_version < 3: + _require_parent_group(path, store=store, chunk_store=chunk_store, + overwrite=overwrite) + + if store_version == 3 and 'zarr.json' not in store: + # initialize with default zarr.json entry level metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, @@ -372,16 +492,50 @@ def _init_array_metadata( dimension_separator=None, ): + store_version = getattr(store, '_store_version', 2) + + path = normalize_storage_path(path) + # guard conditions if overwrite: - # attempt to delete any pre-existing items in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - elif contains_array(store, path): - raise ContainsArrayError(path) - elif contains_group(store, path): - raise ContainsGroupError(path) + if store_version == 2: + # attempt to delete any pre-existing array in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) + else: + group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) + array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) + data_prefix = 'data/root/' + _path_to_prefix(path) + + # attempt to delete any pre-existing array in store + if array_meta_key in store: + store.erase(array_meta_key) # type: ignore + if group_meta_key in store: + store.erase(group_meta_key) # type: ignore + store.erase_prefix(data_prefix) # type: ignore + if chunk_store is not None: + chunk_store.erase_prefix(data_prefix) # type: ignore + + if '/' in path: + # path is a subfolder of an existing array, remove that array + parent_path = '/'.join(path.split('/')[:-1]) + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + array_key = 'meta/root/' + parent_path + '.array' + sfx + if array_key in store: + store.erase(array_key) # type: ignore + + if not overwrite: + if contains_array(store, path): + raise ContainsArrayError(path) + elif contains_group(store, path, explicit_only=False): + raise ContainsGroupError(path) + elif store_version == 3: + if '/' in path: + # cannot create an array within an existing array path + parent_path = '/'.join(path.split('/')[:-1]) + if contains_array(store, parent_path): + raise ContainsArrayError(path) # normalize metadata dtype, object_codec = normalize_dtype(dtype, object_codec) @@ -392,7 +546,7 @@ def _init_array_metadata( fill_value = normalize_fill_value(fill_value, dtype) # optional array metadata - if dimension_separator is None: + if dimension_separator is None and store_version == 2: dimension_separator = getattr(store, "_dimension_separator", None) dimension_separator = normalize_dimension_separator(dimension_separator) @@ -416,6 +570,8 @@ def _init_array_metadata( # obtain filters config if filters: + # TODO: filters was removed from the metadata in v3 + # raise error here if store_version > 2? filters_config = [f.get_config() for f in filters] else: filters_config = [] @@ -441,11 +597,30 @@ def _init_array_metadata( filters_config = None # type: ignore # initialize metadata - meta = dict(shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor_config, fill_value=fill_value, - order=order, filters=filters_config, + # TODO: don't store redundant dimension_separator for v3? + meta = dict(shape=shape, compressor=compressor_config, + fill_value=fill_value, dimension_separator=dimension_separator) - key = _path_to_prefix(path) + array_meta_key + if store_version < 3: + meta.update(dict(chunks=chunks, dtype=dtype, order=order, + filters=filters_config)) + else: + if dimension_separator is None: + dimension_separator = "/" + if filters_config: + attributes = {'filters': filters_config} + else: + attributes = {} + meta.update( + dict(chunk_grid=dict(type="regular", + chunk_shape=chunks, + separator=dimension_separator), + chunk_memory_layout=order, + data_type=dtype, + attributes=attributes) + ) + + key = _prefix_to_array_key(store, _path_to_prefix(path)) if hasattr(store, '_metadata_class'): store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore else: @@ -482,14 +657,26 @@ def init_group( # normalize path path = normalize_storage_path(path) - # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + store_version = getattr(store, '_store_version', 2) + if store_version < 3: + # ensure parent group initialized + _require_parent_group(path, store=store, chunk_store=chunk_store, + overwrite=overwrite) + + if store_version == 3 and 'zarr.json' not in store: + # initialize with default zarr.json entry level metadata + store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore # initialise metadata _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) + if store_version == 3: + # TODO: Should initializing a v3 group also create a corresponding + # empty folder under data/root/? I think probably not until there + # is actual data written there. + pass + def _init_group_metadata( store: StoreLike, @@ -498,22 +685,51 @@ def _init_group_metadata( chunk_store: StoreLike = None, ): + store_version = getattr(store, '_store_version', 2) + path = normalize_storage_path(path) + # guard conditions if overwrite: - # attempt to delete any pre-existing items in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - elif contains_array(store, path): - raise ContainsArrayError(path) - elif contains_group(store, path): - raise ContainsGroupError(path) + if store_version == 2: + # attempt to delete any pre-existing items in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) + else: + group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) + array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) + data_prefix = 'data/root/' + _path_to_prefix(path) + meta_prefix = 'meta/root/' + _path_to_prefix(path) + + # attempt to delete any pre-existing array in store + if array_meta_key in store: + store.erase(array_meta_key) # type: ignore + if group_meta_key in store: + store.erase(group_meta_key) # type: ignore + store.erase_prefix(data_prefix) # type: ignore + store.erase_prefix(meta_prefix) # type: ignore + if chunk_store is not None: + chunk_store.erase_prefix(data_prefix) # type: ignore + + if not overwrite: + if contains_array(store, path): + raise ContainsArrayError(path) + elif contains_group(store, path): + raise ContainsGroupError(path) + elif store_version == 3 and '/' in path: + # cannot create a group overlapping with an existing array name + parent_path = '/'.join(path.split('/')[:-1]) + if contains_array(store, parent_path): + raise ContainsArrayError(path) # initialize metadata # N.B., currently no metadata properties are needed, however there may # be in future - meta = dict() # type: ignore - key = _path_to_prefix(path) + group_meta_key + if store_version == 3: + meta = {'attributes': {}} # type: ignore + else: + meta = {} # type: ignore + key = _prefix_to_group_key(store, _path_to_prefix(path)) if hasattr(store, '_metadata_class'): store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore else: @@ -1139,14 +1355,17 @@ def __init__(self, url, normalize_keys=False, key_separator=None, dimension_separator = key_separator self.key_separator = dimension_separator - if self.key_separator is None: - self.key_separator = "." + self._default_key_separator() # Pass attributes to array creation self._dimension_separator = dimension_separator if self.fs.exists(self.path) and not self.fs.isdir(self.path): raise FSPathExistNotDir(url) + def _default_key_separator(self): + if self.key_separator is None: + self.key_separator = "." + def _normalize_key(self, key): key = normalize_storage_path(key).lstrip('/') if key: @@ -2647,6 +2866,10 @@ class ConsolidatedMetadataStore(Store): def __init__(self, store: StoreLike, metadata_key=".zmetadata"): self.store = Store._ensure_store(store) + if getattr(store, '_store_version', 2) != 2: + raise ValueError("Can only consolidate stores corresponding to " + "the Zarr v2 spec.") + # retrieve consolidated metadata meta = json_loads(store[metadata_key]) @@ -2682,3 +2905,351 @@ def getsize(self, path): def listdir(self, path): return listdir(self.meta_store, path) + + +""" versions of stores following the v3 protocol """ + + +def _get_files_and_dirs_from_path(store, path): + path = normalize_storage_path(path) + + files = [] + # add array metadata file if present + array_key = _prefix_to_array_key(store, path) + if array_key in store: + files.append(os.path.join(store.path, array_key)) + + # add group metadata file if present + group_key = _prefix_to_group_key(store, path) + if group_key in store: + files.append(os.path.join(store.path, group_key)) + + dirs = [] + # add array and group folders if present + for d in ['data/root/' + path, 'meta/root/' + path]: + dir_path = os.path.join(store.path, d) + if os.path.exists(dir_path): + dirs.append(dir_path) + return files, dirs + + +class KVStoreV3(KVStore, StoreV3): + + def list(self): + return list(self._mutable_mapping.keys()) + + +KVStoreV3.__doc__ = KVStore.__doc__ + + +class FSStoreV3(FSStore, StoreV3): + + # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) + _META_KEYS = () + + def _default_key_separator(self): + if self.key_separator is None: + self.key_separator = "/" + + def list(self): + return list(self.keys()) + + def _normalize_key(self, key): + key = normalize_storage_path(key).lstrip('/') + return key.lower() if self.normalize_keys else key + + def getsize(self, path=None): + size = 0 + if path is None or path == '': + # size of both the data and meta subdirs + dirs = [] + for d in ['data/root', 'meta/root']: + dir_path = os.path.join(self.path, d) + if os.path.exists(dir_path): + dirs.append(dir_path) + else: + files, dirs = _get_files_and_dirs_from_path(self, path) + for file in files: + size += os.path.getsize(file) + for d in dirs: + size += self.fs.du(d, total=True, maxdepth=None) + return size + + def setitems(self, values): + if self.mode == 'r': + raise ReadOnlyError() + values = {self._normalize_key(key): val for key, val in values.items()} + + # initialize the /data/root/... folder corresponding to the array! + # Note: zarr.tests.test_core_v3.TestArrayWithFSStoreV3PartialRead fails + # without this explicit creation of directories + subdirectories = set([os.path.dirname(v) for v in values.keys()]) + for subdirectory in subdirectories: + data_dir = os.path.join(self.path, subdirectory) + if not self.fs.exists(data_dir): + self.fs.mkdir(data_dir) + + self.map.setitems(values) + + +class MemoryStoreV3(MemoryStore, StoreV3): + + def __init__(self, root=None, cls=dict, dimension_separator=None): + if root is None: + self.root = cls() + else: + self.root = root + self.cls = cls + self.write_mutex = Lock() + self._dimension_separator = dimension_separator # TODO: modify for v3? + + def __eq__(self, other): + return ( + isinstance(other, MemoryStoreV3) and + self.root == other.root and + self.cls == other.cls + ) + + def list(self): + return list(self.keys()) + + def getsize(self, path: Path = None): + size = 0 + path = normalize_storage_path(path) + members = self.list_prefix('data/root/' + path) + members += self.list_prefix('meta/root/' + path) + for k in members: + try: + v = self[k] + except KeyError: + pass + else: + try: + size += buffer_size(v) + except TypeError: + return -1 + return size + + +MemoryStoreV3.__doc__ = MemoryStore.__doc__ + + +class DirectoryStoreV3(DirectoryStore, StoreV3): + + def list(self): + return list(self.keys()) + + def __eq__(self, other): + return ( + isinstance(other, DirectoryStoreV3) and + self.path == other.path + ) + + # def getsize(self, path=None): + # size = 0 + # if path is None or path == '': + # # add array and group folders if present + # dirs = [] + # for d in ['data/root', 'meta/root']: + # dir_path = os.path.join(self.path, d) + # if os.path.exists(dir_path): + # dirs.append(dir_path) + # print(f"dirs={dirs}") + # else: + # files, dirs = _get_files_and_dirs_from_path(self, path) + # for file in files: + # size += os.path.getsize(file) + # for d in dirs: + # for child in scandir(d): + # print(f"child={child}") + # if child.is_file(): + # size += child.stat().st_size + # return size + + def getsize(self, path: Path = None): + size = 0 + path = normalize_storage_path(path) + members = self.list_prefix('data/root/' + path) + members += self.list_prefix('meta/root/' + path) + for k in members: + try: + v = self[k] + except KeyError: + pass + else: + try: + size += buffer_size(v) + except TypeError: + return -1 + return size + + def rename(self, src_path, dst_path, metadata_key_suffix='.json'): + store_src_path = normalize_storage_path(src_path) + store_dst_path = normalize_storage_path(dst_path) + + dir_path = self.path + any_existed = False + for root_prefix in ['meta', 'data']: + src_path = os.path.join(dir_path, root_prefix, 'root', store_src_path) + if os.path.exists(src_path): + any_existed = True + dst_path = os.path.join(dir_path, root_prefix, 'root', store_dst_path) + os.renames(src_path, dst_path) + + for suffix in ['.array' + metadata_key_suffix, + '.group' + metadata_key_suffix]: + src_meta = os.path.join(dir_path, 'meta', 'root', store_src_path + suffix) + if os.path.exists(src_meta): + any_existed = True + dst_meta = os.path.join(dir_path, 'meta', 'root', store_dst_path + suffix) + dst_dir = os.path.dirname(dst_meta) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + os.rename(src_meta, dst_meta) + if not any_existed: + raise FileNotFoundError("nothing found at src_path") + + +DirectoryStoreV3.__doc__ = DirectoryStore.__doc__ + + +class ZipStoreV3(ZipStore, StoreV3): + + def list(self): + return list(self.keys()) + + def __eq__(self, other): + return ( + isinstance(other, ZipStore) and + self.path == other.path and + self.compression == other.compression and + self.allowZip64 == other.allowZip64 + ) + + def getsize(self, path=None): + path = normalize_storage_path(path) + with self.mutex: + children = self.list_prefix('data/root/' + path) + children += self.list_prefix('meta/root/' + path) + if children: + size = 0 + for name in children: + try: + info = self.zf.getinfo(name) + except KeyError: + pass + else: + size += info.compress_size + return size + elif path: + try: + info = self.zf.getinfo(path) + return info.compress_size + except KeyError: + return 0 + else: + return 0 + + +ZipStoreV3.__doc__ = ZipStore.__doc__ + + +class NestedDirectoryStoreV3(NestedDirectoryStore, DirectoryStoreV3): + + def list(self): + return list(self.keys()) + + def __eq__(self, other): + return ( + isinstance(other, NestedDirectoryStoreV3) and + self.path == other.path + ) + + +NestedDirectoryStoreV3.__doc__ = NestedDirectoryStore.__doc__ + + +class RedisStoreV3(RedisStore, StoreV3): + + def list(self): + return list(self.keys()) + + +RedisStoreV3.__doc__ = RedisStore.__doc__ + + +class MongoDBStoreV3(MongoDBStore, StoreV3): + + def list(self): + return list(self.keys()) + + +MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ + + +class DBMStoreV3(DBMStore, StoreV3): + + def list(self): + return list(self.keys()) + + +DBMStoreV3.__doc__ = DBMStore.__doc__ + + +class LMDBStoreV3(LMDBStore, StoreV3): + + def list(self): + return list(self.keys()) + + +LMDBStoreV3.__doc__ = LMDBStore.__doc__ + + +class SQLiteStoreV3(SQLiteStore, StoreV3): + + def list(self): + return list(self.keys()) + + def getsize(self, path=None): + if path is None or path == '': + # TODO: why does the query below not work in this case? + # For now fall back to the default _getsize implementation + return _getsize(self, path) + else: + path = normalize_storage_path(path) + size = 0 + for _path in ['data/root/' + path, 'meta/root/' + path]: + c = self.cursor.execute( + ''' + SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr + WHERE k LIKE (? || "%") AND + 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") + ''', + (_path, _path) + ) + for item_size, in c: + size += item_size + return size + + +SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ + + +class LRUStoreCacheV3(LRUStoreCache, StoreV3): + + def __init__(self, store, max_size: int): + self._store = StoreV3._ensure_store(store) + self._max_size = max_size + self._current_size = 0 + self._keys_cache = None + self._contains_cache = None + self._listdir_cache: Dict[Path, Any] = dict() + self._values_cache: Dict[Path, Any] = OrderedDict() + self._mutex = Lock() + self.hits = self.misses = 0 + + def list(self): + return list(self.keys()) + + +LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 3438e60691..0865917926 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -18,9 +18,9 @@ from numcodecs.compat import ensure_bytes from zarr.codecs import BZ2, AsType, Blosc, Zlib -from zarr.errors import MetadataError +from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError from zarr.hierarchy import group -from zarr.meta import ZARR_FORMAT, decode_array_metadata +from zarr.meta import ZARR_FORMAT, ZARR_FORMAT_v3, decode_array_metadata from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, KVStore, LMDBStore, @@ -31,7 +31,12 @@ attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) from zarr.storage import FSStore, rename, listdir +from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, + DirectoryStoreV3, NestedDirectoryStoreV3, + RedisStoreV3, MongoDBStoreV3, DBMStoreV3, + LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container +from zarr.tests.util import CountingDictV3 @contextmanager @@ -48,6 +53,15 @@ def dimension_separator_fixture(request): return request.param +@pytest.fixture(params=[ + (None, "/"), + (".", "."), + ("/", "/"), +]) +def dimension_separator_fixture_v3(request): + return request.param + + def skip_if_nested_chunks(**kwargs): if kwargs.get("dimension_separator") == "/": pytest.skip("nested chunks are unsupported") diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py new file mode 100644 index 0000000000..9118bc513c --- /dev/null +++ b/zarr/tests/test_storage_v3.py @@ -0,0 +1,847 @@ +import array +import atexit +import os +import tempfile + +import numpy as np +import pytest + +from numcodecs.compat import ensure_bytes + +from zarr.codecs import Zlib +from zarr.errors import ContainsArrayError, ContainsGroupError +from zarr.meta import ZARR_FORMAT, ZARR_FORMAT_v3 +from zarr.storage import (array_meta_key, atexit_rmglob, atexit_rmtree, + default_compressor, getsize, init_array, init_group) +from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, + DirectoryStoreV3, NestedDirectoryStoreV3, + RedisStoreV3, MongoDBStoreV3, DBMStoreV3, + LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) +from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var + +from .test_storage import (StoreTests, TestMemoryStore, TestDirectoryStore, + TestFSStore, TestNestedDirectoryStore, TestZipStore, + TestDBMStore, TestDBMStoreDumb, TestDBMStoreGnu, + TestDBMStoreNDBM, TestDBMStoreBerkeleyDB, + TestLMDBStore, TestSQLiteStore, + TestSQLiteStoreInMemory, TestLRUStoreCache, + dimension_separator_fixture, s3, + skip_if_nested_chunks) + + +@pytest.fixture(params=[ + (None, "/"), + (".", "."), + ("/", "/"), +]) +def dimension_separator_fixture_v3(request): + return request.param + + +class StoreV3Tests(StoreTests): + + def test_getsize(self): + # TODO: determine proper getsize() behavior for v3 + + # Currently returns the combined size of entries under + # meta/root/path and data/root/path. + # Any path not under meta/root/ or data/root/ (including zarr.json) + # returns size 0. + + store = self.create_store() + if isinstance(store, dict) or hasattr(store, 'getsize'): + assert 0 == getsize(store, 'zarr.json') + store['meta/root/foo/a'] = b'x' + assert 1 == getsize(store) + assert 1 == getsize(store, 'foo') + store['meta/root/foo/b'] = b'x' + assert 2 == getsize(store, 'foo') + assert 1 == getsize(store, 'foo/b') + store['meta/root/bar/a'] = b'yy' + assert 2 == getsize(store, 'bar') + store['data/root/bar/a'] = b'zzz' + assert 5 == getsize(store, 'bar') + store['data/root/baz/a'] = b'zzz' + assert 3 == getsize(store, 'baz') + assert 10 == getsize(store) + store['data/root/quux'] = array.array('B', b'zzzz') + assert 14 == getsize(store) + assert 4 == getsize(store, 'quux') + store['data/root/spong'] = np.frombuffer(b'zzzzz', dtype='u1') + assert 19 == getsize(store) + assert 5 == getsize(store, 'spong') + + store.close() + + # noinspection PyStatementEffect + def test_hierarchy(self): + pytest.skip("TODO: adapt v2 test_hierarchy tests to v3") + + def test_init_array(self, dimension_separator_fixture_v3): + + pass_dim_sep, want_dim_sep = dimension_separator_fixture_v3 + + store = self.create_store() + path = 'arr1' + init_array(store, path=path, shape=1000, chunks=100, + dimension_separator=pass_dim_sep) + + # check metadata + mkey = 'meta/root/' + path + '.array.json' + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + # TODO: zarr_format already stored at the heirarchy level should we + # also keep it in the .array.json? + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype(None) == meta['data_type'] + assert default_compressor.get_config() == meta['compressor'] + assert meta['fill_value'] is None + # Missing MUST be assumed to be "/" + assert meta.get('dimension_separator', "/") is want_dim_sep + assert meta['chunk_grid']['separator'] is want_dim_sep + store.close() + + def _test_init_array_overwrite(self, order): + # setup + store = self.create_store() + + if store._store_version < 3: + path = None + mkey = array_meta_key + else: + path = 'arr1' # no default, have to specify for v3 + mkey = 'meta/root/' + path + '.array.json' + store[mkey] = store._metadata_class.encode_array_metadata( + dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=Zlib(1).get_config(), + fill_value=0, + chunk_memory_layout=order, + filters=None) + ) + + # don't overwrite (default) + with pytest.raises(ContainsArrayError): + init_array(store, path=path, shape=1000, chunks=100) + + # do overwrite + try: + init_array(store, path=path, shape=1000, chunks=100, + dtype='i4', overwrite=True) + except NotImplementedError: + pass + else: + assert mkey in store + meta = store._metadata_class.decode_array_metadata( + store[mkey] + ) + assert (1000,) == meta['shape'] + if store._store_version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype('i4') == meta['dtype'] + elif store._store_version == 3: + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] + else: + raise ValueError( + "unexpected store version: {store._store_version}" + ) + store.close() + + def test_init_array_path(self): + path = 'foo/bar' + store = self.create_store() + init_array(store, shape=1000, chunks=100, path=path) + + # check metadata + mkey = 'meta/root/' + path + '.array.json' + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype(None) == meta['data_type'] + assert default_compressor.get_config() == meta['compressor'] + assert meta['fill_value'] is None + + store.close() + + def _test_init_array_overwrite_path(self, order): + # setup + path = 'foo/bar' + store = self.create_store() + meta = dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=Zlib(1).get_config(), + fill_value=0, + chunk_memory_layout=order, + filters=None) + mkey = 'meta/root/' + path + '.array.json' + store[mkey] = store._metadata_class.encode_array_metadata(meta) + + # don't overwrite + with pytest.raises(ContainsArrayError): + init_array(store, shape=1000, chunks=100, path=path) + + # do overwrite + try: + init_array(store, shape=1000, chunks=100, dtype='i4', path=path, + overwrite=True) + except NotImplementedError: + pass + else: + assert mkey in store + # should have been overwritten + meta = store._metadata_class.decode_array_metadata(store[mkey]) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] + + store.close() + + def test_init_array_overwrite_group(self): + # setup + path = 'foo/bar' + store = self.create_store() + array_key = 'meta/root/' + path + '.array.json' + group_key = 'meta/root/' + path + '.group.json' + store[group_key] = store._metadata_class.encode_group_metadata() + + with pytest.raises(ContainsGroupError): + init_array(store, shape=1000, chunks=100, path=path) + + # do overwrite + try: + init_array(store, shape=1000, chunks=100, dtype='i4', path=path, + overwrite=True) + except NotImplementedError: + pass + else: + assert group_key not in store + assert array_key in store + meta = store._metadata_class.decode_array_metadata( + store[array_key] + ) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] + + store.close() + + def _test_init_array_overwrite_chunk_store(self, order): + # setup + store = self.create_store() + chunk_store = self.create_store() + path = 'arr1' + mkey = 'meta/root/' + path + '.array.json' + store[mkey] = store._metadata_class.encode_array_metadata( + dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=None, + fill_value=0, + filters=None, + chunk_memory_layout=order) + ) + + chunk_store['data/root/arr1/0'] = b'aaa' + chunk_store['data/root/arr1/1'] = b'bbb' + + assert 'data/root/arr1/0' in chunk_store + assert 'data/root/arr1/1' in chunk_store + + # don't overwrite (default) + with pytest.raises(ValueError): + init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) + + # do overwrite + try: + init_array(store, path=path, shape=1000, chunks=100, dtype='i4', + overwrite=True, chunk_store=chunk_store) + except NotImplementedError: + pass + else: + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] + assert 'data/root/arr1/0' not in chunk_store + assert 'data/root/arr1/1' not in chunk_store + + store.close() + chunk_store.close() + + def test_init_array_compat(self): + store = self.create_store() + path = 'arr1' + init_array(store, path=path, shape=1000, chunks=100, compressor='none') + mkey = 'meta/root/' + path + '.array.json' + meta = store._metadata_class.decode_array_metadata( + store[mkey] + ) + assert meta['compressor'] is None + + store.close() + + def test_init_group(self): + store = self.create_store() + path = "meta/root/foo" + init_group(store, path=path) + + # check metadata + mkey = 'meta/root/' + path + '.group.json' + assert mkey in store + meta = store._metadata_class.decode_group_metadata(store[mkey]) + assert meta == {'attributes': {}} + + store.close() + + def _test_init_group_overwrite(self, order): + pytest.skip( + "In v3 array and group names cannot overlap" + ) + + def _test_init_group_overwrite_path(self, order): + # setup + path = 'foo/bar' + store = self.create_store() + meta = dict( + shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=None, + fill_value=0, + filters=None, + chunk_memory_layout=order, + ) + array_key = 'meta/root/' + path + '.array.json' + group_key = 'meta/root/' + path + '.group.json' + store[array_key] = store._metadata_class.encode_array_metadata(meta) + + # don't overwrite + with pytest.raises(ContainsArrayError): + init_group(store, path=path) + + # do overwrite + try: + init_group(store, overwrite=True, path=path) + except NotImplementedError: + pass + else: + assert array_key not in store + assert group_key in store + # should have been overwritten + meta = store._metadata_class.decode_group_metadata(store[group_key]) + # assert ZARR_FORMAT == meta['zarr_format'] + assert meta == {'attributes': {}} + + store.close() + + def _test_init_group_overwrite_chunk_store(self, order): + pytest.skip( + "In v3 array and group names cannot overlap" + ) + + +class TestMappingStoreV3(StoreV3Tests): + + def create_store(self, **kwargs): + return KVStoreV3(dict()) + + def test_set_invalid_content(self): + # Generic mappings support non-buffer types + pass + + +class TestMemoryStoreV3(TestMemoryStore, StoreV3Tests): + + def create_store(self, **kwargs): + skip_if_nested_chunks(**kwargs) + return MemoryStoreV3(**kwargs) + + +class TestDirectoryStoreV3(TestDirectoryStore, StoreV3Tests): + + def create_store(self, normalize_keys=False, **kwargs): + # For v3, don't have to skip if nested. + # skip_if_nested_chunks(**kwargs) + + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = DirectoryStoreV3(path, normalize_keys=normalize_keys, **kwargs) + return store + + +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestFSStoreV3(TestFSStore, StoreV3Tests): + + def create_store(self, normalize_keys=False, + dimension_separator=".", + path=None, + **kwargs): + + if path is None: + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + + store = FSStoreV3( + path, + normalize_keys=normalize_keys, + dimension_separator=dimension_separator, + **kwargs) + return store + + def test_init_array(self): + store = self.create_store() + path = 'arr1' + init_array(store, path=path, shape=1000, chunks=100) + + # check metadata + array_meta_key = 'meta/root/' + path + '.array.json' + assert array_meta_key in store + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype(None) == meta['data_type'] + assert meta['chunk_grid']['separator'] == "/" + + # TODO: remove this skip once v3 support is added to hierarchy.Group + @pytest.mark.skipif(True, reason="need v3 support in zarr.hierarchy.Group") + def test_deep_ndim(self): + import zarr + + store = self.create_store() + foo = zarr.open_group(store=store, path='group1') + bar = foo.create_group("bar") + baz = bar.create_dataset("baz", + shape=(4, 4, 4), + chunks=(2, 2, 2), + dtype="i8") + baz[:] = 1 + assert set(store.listdir()) == set(["data", "meta", "zarr.json"]) + assert set(store.listdir("meta/root/group1")) == set(["bar", "bar.group.json"]) + assert set(store.listdir("data/root/group1")) == set(["bar"]) + assert foo["bar"]["baz"][(0, 0, 0)] == 1 + + +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestFSStoreV3WithKeySeparator(StoreV3Tests): + + def create_store(self, normalize_keys=False, key_separator=".", **kwargs): + + # Since the user is passing key_separator, that will take priority. + skip_if_nested_chunks(**kwargs) + + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + return FSStoreV3( + path, + normalize_keys=normalize_keys, + key_separator=key_separator) + + +# TODO: remove NestedDirectoryStoreV3? +class TestNestedDirectoryStoreV3(TestNestedDirectoryStore, + TestDirectoryStoreV3): + + def create_store(self, normalize_keys=False, **kwargs): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = NestedDirectoryStoreV3(path, normalize_keys=normalize_keys, **kwargs) + return store + + def test_init_array(self): + store = self.create_store() + # assert store._dimension_separator == "/" + path = 'arr1' + init_array(store, path=path, shape=1000, chunks=100) + + # check metadata + array_meta_key = 'meta/root/' + path + '.array.json' + assert array_meta_key in store + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT_v3 == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype(None) == meta['data_type'] + # assert meta['dimension_separator'] == "/" + assert meta['chunk_grid']['separator'] == "/" + +# TODO: enable once N5StoreV3 has been implemented +# @pytest.mark.skipif(True, reason="N5StoreV3 not yet fully implemented") +# class TestN5StoreV3(TestN5Store, TestNestedDirectoryStoreV3, StoreV3Tests): + + +class TestZipStoreV3(TestZipStore, StoreV3Tests): + + def create_store(self, **kwargs): + path = tempfile.mktemp(suffix='.zip') + atexit.register(os.remove, path) + store = ZipStoreV3(path, mode='w', **kwargs) + return store + + def test_mode(self): + with ZipStoreV3('data/store.zip', mode='w') as store: + store['foo'] = b'bar' + store = ZipStoreV3('data/store.zip', mode='r') + with pytest.raises(PermissionError): + store['foo'] = b'bar' + with pytest.raises(PermissionError): + store.clear() + + +class TestDBMStoreV3(TestDBMStore, StoreV3Tests): + + def create_store(self, dimension_separator=None): + path = tempfile.mktemp(suffix='.anydbm') + atexit.register(atexit_rmglob, path + '*') + # create store using default dbm implementation + store = DBMStoreV3(path, flag='n', dimension_separator=dimension_separator) + return store + + +class TestDBMStoreV3Dumb(TestDBMStoreDumb, StoreV3Tests): + + def create_store(self, **kwargs): + path = tempfile.mktemp(suffix='.dumbdbm') + atexit.register(atexit_rmglob, path + '*') + + import dbm.dumb as dumbdbm + store = DBMStoreV3(path, flag='n', open=dumbdbm.open, **kwargs) + return store + + +class TestDBMStoreV3Gnu(TestDBMStoreGnu, StoreV3Tests): + + def create_store(self, **kwargs): + gdbm = pytest.importorskip("dbm.gnu") + path = tempfile.mktemp(suffix=".gdbm") # pragma: no cover + atexit.register(os.remove, path) # pragma: no cover + store = DBMStoreV3( + path, flag="n", open=gdbm.open, write_lock=False, **kwargs + ) # pragma: no cover + return store # pragma: no cover + + +class TestDBMStoreV3NDBM(TestDBMStoreNDBM, StoreV3Tests): + + def create_store(self, **kwargs): + ndbm = pytest.importorskip("dbm.ndbm") + path = tempfile.mktemp(suffix=".ndbm") # pragma: no cover + atexit.register(atexit_rmglob, path + "*") # pragma: no cover + store = DBMStoreV3(path, flag="n", open=ndbm.open, **kwargs) # pragma: no cover + return store # pragma: no cover + + +class TestDBMStoreV3BerkeleyDB(TestDBMStoreBerkeleyDB, StoreV3Tests): + + def create_store(self, **kwargs): + bsddb3 = pytest.importorskip("bsddb3") + path = tempfile.mktemp(suffix='.dbm') + atexit.register(os.remove, path) + store = DBMStoreV3(path, flag='n', open=bsddb3.btopen, write_lock=False, **kwargs) + return store + + +class TestLMDBStoreV3(TestLMDBStore, StoreV3Tests): + + def create_store(self, **kwargs): + pytest.importorskip("lmdb") + path = tempfile.mktemp(suffix='.lmdb') + atexit.register(atexit_rmtree, path) + buffers = True + store = LMDBStoreV3(path, buffers=buffers, **kwargs) + return store + + +class TestSQLiteStoreV3(TestSQLiteStore, StoreV3Tests): + + def create_store(self, **kwargs): + pytest.importorskip("sqlite3") + path = tempfile.mktemp(suffix='.db') + atexit.register(atexit_rmtree, path) + store = SQLiteStoreV3(path, **kwargs) + return store + + +class TestSQLiteStoreV3InMemory(TestSQLiteStoreInMemory, StoreV3Tests): + + def create_store(self, **kwargs): + pytest.importorskip("sqlite3") + store = SQLiteStoreV3(':memory:', **kwargs) + return store + + +@skip_test_env_var("ZARR_TEST_MONGO") +class TestMongoDBStoreV3(StoreV3Tests): + + def create_store(self, **kwargs): + pytest.importorskip("pymongo") + store = MongoDBStoreV3(host='127.0.0.1', database='zarr_tests', + collection='zarr_tests', **kwargs) + # start with an empty store + store.clear() + return store + + +@skip_test_env_var("ZARR_TEST_REDIS") +class TestRedisStoreV3(StoreV3Tests): + + def create_store(self, **kwargs): + # TODO: this is the default host for Redis on Travis, + # we probably want to generalize this though + pytest.importorskip("redis") + store = RedisStoreV3(host='localhost', port=6379, **kwargs) + # start with an empty store + store.clear() + return store + + +class TestLRUStoreCacheV3(TestLRUStoreCache, StoreV3Tests): + + def create_store(self, **kwargs): + # wrapper therefore no dimension_separator argument + skip_if_nested_chunks(**kwargs) + return LRUStoreCacheV3(dict(), max_size=2**27) + + def test_cache_values_no_max_size(self): + + # setup store + store = CountingDictV3() + store['foo'] = b'xxx' + store['bar'] = b'yyy' + assert 0 == store.counter['__getitem__', 'foo'] + assert 1 == store.counter['__setitem__', 'foo'] + assert 0 == store.counter['__getitem__', 'bar'] + assert 1 == store.counter['__setitem__', 'bar'] + + # setup cache + cache = LRUStoreCacheV3(store, max_size=None) + assert 0 == cache.hits + assert 0 == cache.misses + + # test first __getitem__, cache miss + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 1 == store.counter['__setitem__', 'foo'] + assert 0 == cache.hits + assert 1 == cache.misses + + # test second __getitem__, cache hit + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 1 == store.counter['__setitem__', 'foo'] + assert 1 == cache.hits + assert 1 == cache.misses + + # test __setitem__, __getitem__ + cache['foo'] = b'zzz' + assert 1 == store.counter['__getitem__', 'foo'] + assert 2 == store.counter['__setitem__', 'foo'] + # should be a cache hit + assert b'zzz' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 2 == store.counter['__setitem__', 'foo'] + assert 2 == cache.hits + assert 1 == cache.misses + + # manually invalidate all cached values + cache.invalidate_values() + assert b'zzz' == cache['foo'] + assert 2 == store.counter['__getitem__', 'foo'] + assert 2 == store.counter['__setitem__', 'foo'] + cache.invalidate() + assert b'zzz' == cache['foo'] + assert 3 == store.counter['__getitem__', 'foo'] + assert 2 == store.counter['__setitem__', 'foo'] + + # test __delitem__ + del cache['foo'] + with pytest.raises(KeyError): + # noinspection PyStatementEffect + cache['foo'] + with pytest.raises(KeyError): + # noinspection PyStatementEffect + store['foo'] + + # verify other keys untouched + assert 0 == store.counter['__getitem__', 'bar'] + assert 1 == store.counter['__setitem__', 'bar'] + + def test_cache_values_with_max_size(self): + + # setup store + store = CountingDictV3() + store['foo'] = b'xxx' + store['bar'] = b'yyy' + assert 0 == store.counter['__getitem__', 'foo'] + assert 0 == store.counter['__getitem__', 'bar'] + # setup cache - can only hold one item + cache = LRUStoreCacheV3(store, max_size=5) + assert 0 == cache.hits + assert 0 == cache.misses + + # test first 'foo' __getitem__, cache miss + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 0 == cache.hits + assert 1 == cache.misses + + # test second 'foo' __getitem__, cache hit + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 1 == cache.hits + assert 1 == cache.misses + + # test first 'bar' __getitem__, cache miss + assert b'yyy' == cache['bar'] + assert 1 == store.counter['__getitem__', 'bar'] + assert 1 == cache.hits + assert 2 == cache.misses + + # test second 'bar' __getitem__, cache hit + assert b'yyy' == cache['bar'] + assert 1 == store.counter['__getitem__', 'bar'] + assert 2 == cache.hits + assert 2 == cache.misses + + # test 'foo' __getitem__, should have been evicted, cache miss + assert b'xxx' == cache['foo'] + assert 2 == store.counter['__getitem__', 'foo'] + assert 2 == cache.hits + assert 3 == cache.misses + + # test 'bar' __getitem__, should have been evicted, cache miss + assert b'yyy' == cache['bar'] + assert 2 == store.counter['__getitem__', 'bar'] + assert 2 == cache.hits + assert 4 == cache.misses + + # setup store + store = CountingDictV3() + store['foo'] = b'xxx' + store['bar'] = b'yyy' + assert 0 == store.counter['__getitem__', 'foo'] + assert 0 == store.counter['__getitem__', 'bar'] + # setup cache - can hold two items + cache = LRUStoreCacheV3(store, max_size=6) + assert 0 == cache.hits + assert 0 == cache.misses + + # test first 'foo' __getitem__, cache miss + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 0 == cache.hits + assert 1 == cache.misses + + # test second 'foo' __getitem__, cache hit + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 1 == cache.hits + assert 1 == cache.misses + + # test first 'bar' __getitem__, cache miss + assert b'yyy' == cache['bar'] + assert 1 == store.counter['__getitem__', 'bar'] + assert 1 == cache.hits + assert 2 == cache.misses + + # test second 'bar' __getitem__, cache hit + assert b'yyy' == cache['bar'] + assert 1 == store.counter['__getitem__', 'bar'] + assert 2 == cache.hits + assert 2 == cache.misses + + # test 'foo' __getitem__, should still be cached + assert b'xxx' == cache['foo'] + assert 1 == store.counter['__getitem__', 'foo'] + assert 3 == cache.hits + assert 2 == cache.misses + + # test 'bar' __getitem__, should still be cached + assert b'yyy' == cache['bar'] + assert 1 == store.counter['__getitem__', 'bar'] + assert 4 == cache.hits + assert 2 == cache.misses + + def test_cache_keys(self): + + # setup + store = CountingDictV3() + store['foo'] = b'xxx' + store['bar'] = b'yyy' + assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__iter__'] + assert 0 == store.counter['keys'] + cache = LRUStoreCacheV3(store, max_size=None) + + # keys should be cached on first call + keys = sorted(cache.keys()) + assert keys == ['bar', 'foo'] + assert 1 == store.counter['keys'] + # keys should now be cached + assert keys == sorted(cache.keys()) + assert 1 == store.counter['keys'] + assert 'foo' in cache + assert 0 == store.counter['__contains__', 'foo'] + assert keys == sorted(cache) + assert 0 == store.counter['__iter__'] + assert 1 == store.counter['keys'] + + # cache should be cleared if store is modified - crude but simple for now + cache['baz'] = b'zzz' + keys = sorted(cache.keys()) + assert keys == ['bar', 'baz', 'foo'] + assert 2 == store.counter['keys'] + # keys should now be cached + assert keys == sorted(cache.keys()) + assert 2 == store.counter['keys'] + + # manually invalidate keys + cache.invalidate_keys() + keys = sorted(cache.keys()) + assert keys == ['bar', 'baz', 'foo'] + assert 3 == store.counter['keys'] + assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__iter__'] + cache.invalidate_keys() + keys = sorted(cache) + assert keys == ['bar', 'baz', 'foo'] + assert 4 == store.counter['keys'] + assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__iter__'] + cache.invalidate_keys() + assert 'foo' in cache + assert 5 == store.counter['keys'] + assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__iter__'] + + # check these would get counted if called directly + assert 'foo' in store + assert 1 == store.counter['__contains__', 'foo'] + assert keys == sorted(store) + assert 1 == store.counter['__iter__'] + + +# TODO: implement ABSStoreV3 +# @skip_test_env_var("ZARR_TEST_ABS") +# class TestABSStoreV3(TestABSStore, StoreV3Tests): diff --git a/zarr/tests/util.py b/zarr/tests/util.py index e0f11d72ad..bb4df90d1b 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -1,7 +1,7 @@ import collections import os -from zarr.storage import Store +from zarr.storage import Store, StoreV3 import pytest @@ -41,6 +41,10 @@ def __delitem__(self, key): del self.wrapped[key] +class CountingDictV3(CountingDict, StoreV3): + pass + + def skip_test_env_var(name): """ Checks for environment variables indicating whether tests requiring services should be run """ From 85b8e2322025a367a9f47329cfef9e8a095b0e87 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 17 Nov 2021 13:58:52 -0500 Subject: [PATCH 002/109] add TODO comment to meta.py --- zarr/meta.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/zarr/meta.py b/zarr/meta.py index 07fbdcb7d4..d3f4ec50d5 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -22,6 +22,12 @@ ) _v3_core_type = {"bool", "i1", "u1"} | _v3_core_type +# TODO: How do we want to handle dtypes not officially in the v3 spec? +# Those in _v3_core_type above are the only ones defined in the spec. +# However we currently support many other dtypes for v2. For now, I also +# allow all of these for v3 unless the user sets an environment variable +# ZARR_V3_CORE_DTYPES_ONLY=1, etc. + ZARR_V3_CORE_DTYPES_ONLY = int(os.environ.get("ZARR_V3_CORE_DTYPES_ONLY", False)) ZARR_V3_ALLOW_COMPLEX = int(os.environ.get("ZARR_V3_ALLOW_COMPLEX", not ZARR_V3_CORE_DTYPES_ONLY)) From 983d190dd7be8fb9b7db95cbb3d92c43517a71d3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 14:37:47 -0500 Subject: [PATCH 003/109] fix flake8 errors --- zarr/_storage/store.py | 1 - zarr/tests/test_storage.py | 9 ++------- zarr/tests/test_storage_v3.py | 6 +++--- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 0ff9e0c043..2c6d7b3978 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,4 +1,3 @@ -import json import sys from collections.abc import MutableMapping from string import ascii_letters, digits diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0865917926..1cad7f459f 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -18,9 +18,9 @@ from numcodecs.compat import ensure_bytes from zarr.codecs import BZ2, AsType, Blosc, Zlib -from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError +from zarr.errors import MetadataError from zarr.hierarchy import group -from zarr.meta import ZARR_FORMAT, ZARR_FORMAT_v3, decode_array_metadata +from zarr.meta import ZARR_FORMAT, decode_array_metadata from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, KVStore, LMDBStore, @@ -31,12 +31,7 @@ attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2) from zarr.storage import FSStore, rename, listdir -from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, - DirectoryStoreV3, NestedDirectoryStoreV3, - RedisStoreV3, MongoDBStoreV3, DBMStoreV3, - LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container -from zarr.tests.util import CountingDictV3 @contextmanager diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 9118bc513c..24f358c350 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -6,8 +6,6 @@ import numpy as np import pytest -from numcodecs.compat import ensure_bytes - from zarr.codecs import Zlib from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.meta import ZARR_FORMAT, ZARR_FORMAT_v3 @@ -25,9 +23,11 @@ TestDBMStoreNDBM, TestDBMStoreBerkeleyDB, TestLMDBStore, TestSQLiteStore, TestSQLiteStoreInMemory, TestLRUStoreCache, - dimension_separator_fixture, s3, skip_if_nested_chunks) +# pytest will fail to run if the following fixtures aren't imported here +from .test_storage import dimension_separator_fixture, s3 # noqa + @pytest.fixture(params=[ (None, "/"), From 9ed6181c41567fa99ef6fe1c3f73d89d62280706 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 14 Dec 2021 19:10:04 -0500 Subject: [PATCH 004/109] follow zarr v3 spec when dealing with extension data types --- zarr/meta.py | 224 +++++++++++++++++++++++++-------------------------- 1 file changed, 111 insertions(+), 113 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index d3f4ec50d5..72c6cfc869 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,6 +1,7 @@ import base64 import itertools import os +from collections import namedtuple from collections.abc import Mapping import numpy as np @@ -13,42 +14,72 @@ ZARR_FORMAT = 2 ZARR_FORMAT_v3 = 3 -FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} - - -_v3_core_type = set( - "".join(d) - for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8")) -) -_v3_core_type = {"bool", "i1", "u1"} | _v3_core_type - -# TODO: How do we want to handle dtypes not officially in the v3 spec? -# Those in _v3_core_type above are the only ones defined in the spec. -# However we currently support many other dtypes for v2. For now, I also -# allow all of these for v3 unless the user sets an environment variable -# ZARR_V3_CORE_DTYPES_ONLY=1, etc. - -ZARR_V3_CORE_DTYPES_ONLY = int(os.environ.get("ZARR_V3_CORE_DTYPES_ONLY", False)) -ZARR_V3_ALLOW_COMPLEX = int(os.environ.get("ZARR_V3_ALLOW_COMPLEX", - not ZARR_V3_CORE_DTYPES_ONLY)) -ZARR_V3_ALLOW_DATETIME = int(os.environ.get("ZARR_V3_ALLOW_DATETIME", - not ZARR_V3_CORE_DTYPES_ONLY)) -ZARR_V3_ALLOW_STRUCTURED = int(os.environ.get("ZARR_V3_ALLOW_STRUCTURED", - not ZARR_V3_CORE_DTYPES_ONLY)) -ZARR_V3_ALLOW_OBJECTARRAY = int(os.environ.get("ZARR_V3_ALLOW_OBJECTARRAY", - not ZARR_V3_CORE_DTYPES_ONLY)) -ZARR_V3_ALLOW_BYTES_ARRAY = int(os.environ.get("ZARR_V3_ALLOW_BYTES_ARRAY", - not ZARR_V3_CORE_DTYPES_ONLY)) -ZARR_V3_ALLOW_UNICODE_ARRAY = int(os.environ.get("ZARR_V3_ALLOW_UNICODE_ARRAY", - not ZARR_V3_CORE_DTYPES_ONLY)) +# FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} _default_entry_point_metadata_v3 = { - 'zarr_format': "https://purl.org/zarr/spec/protocol/core/3.0", - 'metadata_encoding': "https://purl.org/zarr/spec/protocol/core/3.0", - 'metadata_key_suffix': '.json', + "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0", + "metadata_encoding": "https://purl.org/zarr/spec/protocol/core/3.0", + "metadata_key_suffix": ".json", "extensions": [], } +_v3_core_types = set( + "".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8")) +) +_v3_core_types = {"bool", "i1", "u1"} | _v3_core_types + +# The set of complex types allowed ({"c4", ">c8"}) +_v3_complex_types = set( + f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("4", "8")) +) + +# All dtype.str values corresponding to datetime64 and timedelta64 +# see: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units +_date_units = ["Y", "M", "W", "D"] +_time_units = ["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] +_v3_datetime_types = set(f"{end}{kind}8[{unit}]" for end, unit, kind in itertools.product("<>", _date_units + _time_units, ('m', 'M'))) + + +def get_extended_dtype_info(dtype): + if dtype.str in _v3_complex_types: + return dict( + extension="https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/complex-dtypes/v1.0.html", # noqa + type=dtype.str, + fallback=None, + ) + elif dtype.str == "|O": + return dict( + extension="TODO: object array protocol URL", # noqa + type=dtype.str, + fallback=None, + ) + elif dtype.str.startswith("|S"): + return dict( + extension="TODO: bytestring array protocol URL", # noqa + type=dtype.str, + fallback=None, + ) + elif dtype.str.startswith("|U"): + return dict( + extension="TODO: unicode array protocol URL", # noqa + type=dtype.str, + fallback=None, + ) + elif dtype.str.startswith("|V"): + return dict( + extension="TODO: structured array protocol URL", # noqa + type=dtype.descr, + fallback=None, + ) + elif dtype.str in _v3_datetime_types: + return dict( + extension="https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/datetime-dtypes/v1.0.html", # noqa + type=dtype.str, + fallback=None, + ) + else: + raise ValueError(f"Unsupport dtype: {dtype}") + class Metadata2: ZARR_FORMAT = ZARR_FORMAT @@ -85,12 +116,13 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A dtype = cls.decode_dtype(meta["dtype"]) if dtype.hasobject: import numcodecs - object_codec = numcodecs.get_codec(meta['filters'][0]) + + object_codec = numcodecs.get_codec(meta["filters"][0]) else: object_codec = None dimension_separator = meta.get("dimension_separator", None) - fill_value = cls.decode_fill_value(meta['fill_value'], dtype, object_codec) + fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) meta = dict( zarr_format=meta["zarr_format"], shape=tuple(meta["shape"]), @@ -102,7 +134,7 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A filters=meta["filters"], ) if dimension_separator: - meta['dimension_separator'] = dimension_separator + meta["dimension_separator"] = dimension_separator except Exception as e: raise MetadataError("error decoding metadata") from e else: @@ -118,7 +150,8 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: dimension_separator = meta.get("dimension_separator") if dtype.hasobject: import numcodecs - object_codec = numcodecs.get_codec(meta['filters'][0]) + + object_codec = numcodecs.get_codec(meta["filters"][0]) else: object_codec = None @@ -133,7 +166,7 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: filters=meta["filters"], ) if dimension_separator: - meta['dimension_separator'] = dimension_separator + meta["dimension_separator"] = dimension_separator if dimension_separator: meta["dimension_separator"] = dimension_separator @@ -180,13 +213,15 @@ def encode_group_metadata(cls, meta=None) -> bytes: return json_dumps(meta) @classmethod - def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: + def decode_fill_value( + cls, v: Any, dtype: np.dtype, object_codec: Any = None + ) -> Any: # early out if v is None: return v - if dtype.kind == 'V' and dtype.hasobject: + if dtype.kind == "V" and dtype.hasobject: if object_codec is None: - raise ValueError('missing object_codec for object array') + raise ValueError("missing object_codec for object array") v = base64.standard_b64decode(v) v = object_codec.decode(v) v = np.array(v, dtype=dtype)[()] @@ -228,15 +263,17 @@ def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return np.array(v, dtype=dtype)[()] @classmethod - def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: + def encode_fill_value( + cls, v: Any, dtype: np.dtype, object_codec: Any = None + ) -> Any: # early out if v is None: return v - if dtype.kind == 'V' and dtype.hasobject: + if dtype.kind == "V" and dtype.hasobject: if object_codec is None: - raise ValueError('missing object_codec for object array') + raise ValueError("missing object_codec for object array") v = object_codec.encode(v) - v = str(base64.standard_b64encode(v), 'ascii') + v = str(base64.standard_b64encode(v), "ascii") return v if dtype.kind == "f": if np.isnan(v): @@ -253,8 +290,10 @@ def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return bool(v) elif dtype.kind in "c": c = cast(np.complex128, np.dtype(complex).type()) - v = (cls.encode_fill_value(v.real, c.real.dtype, object_codec), - cls.encode_fill_value(v.imag, c.imag.dtype, object_codec)) + v = ( + cls.encode_fill_value(v.real, c.real.dtype, object_codec), + cls.encode_fill_value(v.imag, c.imag.dtype, object_codec), + ) return v elif dtype.kind in "SV": v = str(base64.standard_b64encode(v), "ascii") @@ -272,74 +311,29 @@ class Metadata3(Metadata2): @classmethod def decode_dtype(cls, d): + if isinstance(d, dict): + # extract the type from the extension info + info = get_extended_dtype_info(d) + d = info['type'] d = cls._decode_dtype_descr(d) dtype = np.dtype(d) - if dtype.kind == 'c': - if not ZARR_V3_ALLOW_COMPLEX: - raise ValueError("complex-valued arrays not supported") - elif dtype.kind in 'mM': - if not ZARR_V3_ALLOW_DATETIME: - raise ValueError( - "datetime64 and timedelta64 arrays not supported" - ) - elif dtype.kind == 'O': - if not ZARR_V3_ALLOW_OBJECTARRAY: - raise ValueError("object arrays not supported") - elif dtype.kind == 'V': - if not ZARR_V3_ALLOW_STRUCTURED: - raise ValueError("structured arrays not supported") - elif dtype.kind == 'U': - if not ZARR_V3_ALLOW_UNICODE_ARRAY: - raise ValueError("unicode arrays not supported") - elif dtype.kind == 'S': - if not ZARR_V3_ALLOW_BYTES_ARRAY: - raise ValueError("bytes arrays not supported") - else: - assert d in _v3_core_type return dtype @classmethod def encode_dtype(cls, d): - s = Metadata2.encode_dtype(d) + s = d.str if s == "|b1": return "bool" elif s == "|u1": return "u1" elif s == "|i1": return "i1" - dtype = np.dtype(d) - if dtype.kind == "c": - if not ZARR_V3_ALLOW_COMPLEX: - raise ValueError( - "complex-valued arrays not part of the base v3 spec" - ) - elif dtype.kind in "mM": - if not ZARR_V3_ALLOW_DATETIME: - raise ValueError( - "datetime64 and timedelta64 not part of the base v3 " - "spec" - ) - elif dtype.kind == "O": - if not ZARR_V3_ALLOW_OBJECTARRAY: - raise ValueError( - "object dtypes are not part of the base v3 spec" - ) - elif dtype.kind == "V": - if not ZARR_V3_ALLOW_STRUCTURED: - raise ValueError( - "structured arrays are not part of the base v3 spec" - ) - elif dtype.kind == 'U': - if not ZARR_V3_ALLOW_UNICODE_ARRAY: - raise ValueError("unicode dtypes are not part of the base v3 " - "spec") - elif dtype.kind == 'S': - if not ZARR_V3_ALLOW_BYTES_ARRAY: - raise ValueError("bytes dtypes are not part of the base v3 " - "spec") + elif s in _v3_core_types: + return Metadata2.encode_dtype(d) else: - assert s in _v3_core_type - return s + # Check if this dtype corresponds to a supported extension to + # the v3 protocol. + return get_extended_dtype_info(np.dtype(d)) @classmethod def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: @@ -350,7 +344,7 @@ def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A # if zarr_format != cls.ZARR_FORMAT: # raise MetadataError("unsupported zarr format: %s" % zarr_format) - assert 'attributes' in meta + assert "attributes" in meta # meta = dict(attributes=meta['attributes']) return meta @@ -362,7 +356,7 @@ def encode_group_metadata(cls, meta=None) -> bytes: # entry point metadata instead # meta = dict(zarr_format=cls.ZARR_FORMAT) if meta is None: - meta = {'attributes': {}} + meta = {"attributes": {}} meta = dict(attributes=meta.get("attributes", {})) return json_dumps(meta) @@ -371,26 +365,28 @@ def encode_hierarchy_metadata(cls, meta=None) -> bytes: if meta is None: meta = _default_entry_point_metadata_v3 elif set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", + "zarr_format", + "metadata_encoding", + "metadata_key_suffix", + "extensions", }: raise ValueError(f"Unexpected keys in metadata. meta={meta}") return json_dumps(meta) @classmethod - def decode_hierarchy_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + def decode_hierarchy_metadata( + cls, s: Union[MappingType, str] + ) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format # zarr_format = meta.get("zarr_format", None) # if zarr_format != "https://purl.org/zarr/spec/protocol/core/3.0": # raise MetadataError("unsupported zarr format: %s" % zarr_format) if set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", + "zarr_format", + "metadata_encoding", + "metadata_key_suffix", + "extensions", }: raise ValueError(f"Unexpected keys in metdata. meta={meta}") return meta @@ -409,7 +405,8 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A dtype = cls.decode_dtype(meta["data_type"]) if dtype.hasobject: import numcodecs - object_codec = numcodecs.get_codec(meta['attributes']['filters'][0]) + + object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) else: object_codec = None fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) @@ -446,7 +443,8 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: dimension_separator = meta.get("dimension_separator") if dtype.hasobject: import numcodecs - object_codec = numcodecs.get_codec(meta['attributes']['filters'][0]) + + object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) else: object_codec = None meta = dict( From 662e310fb5fe01500a10c5f02c358ebda559a5ec Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 14 Dec 2021 19:40:07 -0500 Subject: [PATCH 005/109] fixes to v3 dtype handling --- zarr/meta.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 72c6cfc869..bdb7dd9702 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -28,9 +28,9 @@ ) _v3_core_types = {"bool", "i1", "u1"} | _v3_core_types -# The set of complex types allowed ({"c4", ">c8"}) +# The set of complex types allowed ({"c8", ">c16"}) _v3_complex_types = set( - f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("4", "8")) + f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16")) ) # All dtype.str values corresponding to datetime64 and timedelta64 @@ -310,13 +310,24 @@ class Metadata3(Metadata2): ZARR_FORMAT = ZARR_FORMAT_v3 @classmethod - def decode_dtype(cls, d): + def decode_dtype(cls, d, validate=True): if isinstance(d, dict): # extract the type from the extension info - info = get_extended_dtype_info(d) - d = info['type'] + try: + d = d['type'] + except KeyError: + raise KeyError( + "Extended dtype info must provide a key named 'type'." + ) d = cls._decode_dtype_descr(d) dtype = np.dtype(d) + if validate: + if dtype.str in (_v3_core_types | {"|b1", "|u1", "|i1"}): + # it is a core dtype of the v3 spec + pass + else: + # will raise if this is not a recognized extended dtype + get_extended_dtype_info(dtype) return dtype @classmethod From 450c57506a7427692a55acf098a57a10152dda88 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 15 Dec 2021 09:40:50 -0500 Subject: [PATCH 006/109] flake8 cleanup --- zarr/meta.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index bdb7dd9702..3d56e16fd3 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,7 +1,5 @@ import base64 import itertools -import os -from collections import namedtuple from collections.abc import Mapping import numpy as np @@ -37,7 +35,10 @@ # see: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units _date_units = ["Y", "M", "W", "D"] _time_units = ["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] -_v3_datetime_types = set(f"{end}{kind}8[{unit}]" for end, unit, kind in itertools.product("<>", _date_units + _time_units, ('m', 'M'))) +_v3_datetime_types = set( + f"{end}{kind}8[{unit}]" + for end, unit, kind in itertools.product("<>", _date_units + _time_units, ('m', 'M')) +) def get_extended_dtype_info(dtype): @@ -322,7 +323,7 @@ def decode_dtype(cls, d, validate=True): d = cls._decode_dtype_descr(d) dtype = np.dtype(d) if validate: - if dtype.str in (_v3_core_types | {"|b1", "|u1", "|i1"}): + if dtype.str in (_v3_core_types | {"|b1", "|u1", "|i1"}): # it is a core dtype of the v3 spec pass else: From 63a9e3c5ea8d692f67338036f1ed3e4134903ae3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 15 Dec 2021 19:47:06 -0500 Subject: [PATCH 007/109] remove duplicate lines in Metadata2.encode_array_metadata --- zarr/meta.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 3d56e16fd3..62bbd1b93d 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -169,9 +169,6 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: if dimension_separator: meta["dimension_separator"] = dimension_separator - if dimension_separator: - meta["dimension_separator"] = dimension_separator - return json_dumps(meta) @classmethod From 1a2a1efe88d1a09a0460ceafe3d7733ec8bfc5dd Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 16 Dec 2021 22:24:49 -0500 Subject: [PATCH 008/109] Fix fields in array metadata zarr_version should not be in the array metadata, only the base store metadata compressor should be absent when there is no compression --- zarr/meta.py | 18 +++++++++--------- zarr/tests/test_storage_v3.py | 13 ++----------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 62bbd1b93d..b14d4c8293 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -404,11 +404,6 @@ def decode_hierarchy_metadata( def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) - # check metadata format - zarr_format = meta.get("zarr_format", None) - if zarr_format != cls.ZARR_FORMAT: - raise MetadataError("unsupported zarr format: %s" % zarr_format) - # extract array metadata fields try: dtype = cls.decode_dtype(meta["data_type"]) @@ -420,8 +415,9 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A object_codec = None fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) # TODO: remove dimension_separator? + + compressor = meta.get("compressor", None) meta = dict( - zarr_format=meta["zarr_format"], shape=tuple(meta["shape"]), chunk_grid=dict( type=meta["chunk_grid"]["type"], @@ -429,12 +425,15 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A separator=meta["chunk_grid"]["separator"], ), data_type=dtype, - compressor=meta["compressor"], fill_value=fill_value, chunk_memory_layout=meta["chunk_memory_layout"], dimension_separator=meta.get("dimension_separator", "/"), attributes=meta["attributes"], ) + # compressor field should be absent when there is no compression + if compressor: + meta['compressor'] = compressor + # dimension_separator = meta.get("dimension_separator", None) # if dimension_separator: # meta["dimension_separator"] = dimension_separator @@ -456,8 +455,8 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) else: object_codec = None + compressor = meta.get("compressor", None) meta = dict( - zarr_format=cls.ZARR_FORMAT, shape=meta["shape"] + sdshape, chunk_grid=dict( type=meta["chunk_grid"]["type"], @@ -465,11 +464,12 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: separator=meta["chunk_grid"]["separator"], ), data_type=cls.encode_dtype(dtype), - compressor=meta["compressor"], fill_value=encode_fill_value(meta["fill_value"], dtype, object_codec), chunk_memory_layout=meta["chunk_memory_layout"], attributes=meta.get("attributes", {}), ) + if compressor: + meta["compressor"] = compressor if dimension_separator: meta["dimension_separator"] = dimension_separator return json_dumps(meta) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 24f358c350..23b202d618 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -8,7 +8,7 @@ from zarr.codecs import Zlib from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.meta import ZARR_FORMAT, ZARR_FORMAT_v3 +from zarr.meta import ZARR_FORMAT from zarr.storage import (array_meta_key, atexit_rmglob, atexit_rmtree, default_compressor, getsize, init_array, init_group) from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, @@ -92,7 +92,6 @@ def test_init_array(self, dimension_separator_fixture_v3): meta = store._metadata_class.decode_array_metadata(store[mkey]) # TODO: zarr_format already stored at the heirarchy level should we # also keep it in the .array.json? - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] @@ -146,7 +145,6 @@ def _test_init_array_overwrite(self, order): assert (100,) == meta['chunks'] assert np.dtype('i4') == meta['dtype'] elif store._store_version == 3: - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype('i4') == meta['data_type'] else: @@ -164,7 +162,6 @@ def test_init_array_path(self): mkey = 'meta/root/' + path + '.array.json' assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] @@ -203,7 +200,6 @@ def _test_init_array_overwrite_path(self, order): assert mkey in store # should have been overwritten meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype('i4') == meta['data_type'] @@ -233,7 +229,6 @@ def test_init_array_overwrite_group(self): meta = store._metadata_class.decode_array_metadata( store[array_key] ) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype('i4') == meta['data_type'] @@ -277,7 +272,6 @@ def _test_init_array_overwrite_chunk_store(self, order): else: assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype('i4') == meta['data_type'] @@ -295,7 +289,7 @@ def test_init_array_compat(self): meta = store._metadata_class.decode_array_metadata( store[mkey] ) - assert meta['compressor'] is None + assert 'compressor' not in meta store.close() @@ -350,7 +344,6 @@ def _test_init_group_overwrite_path(self, order): assert group_key in store # should have been overwritten meta = store._metadata_class.decode_group_metadata(store[group_key]) - # assert ZARR_FORMAT == meta['zarr_format'] assert meta == {'attributes': {}} store.close() @@ -418,7 +411,6 @@ def test_init_array(self): array_meta_key = 'meta/root/' + path + '.array.json' assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] @@ -479,7 +471,6 @@ def test_init_array(self): array_meta_key = 'meta/root/' + path + '.array.json' assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT_v3 == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] From bcb2d5f6f3c63cfcb21980899aa90b23746266e6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 16 Dec 2021 23:36:30 -0500 Subject: [PATCH 009/109] Fix encode/decode of codec metadata classmethods adapted from zarrita code --- zarr/meta.py | 50 ++++++++++++++++++++++++++++++++--- zarr/storage.py | 5 ++-- zarr/tests/test_storage_v3.py | 8 +++--- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index b14d4c8293..893d3efbdf 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -2,12 +2,14 @@ import itertools from collections.abc import Mapping +import numcodecs import numpy as np +from numcodecs.abc import Codec from zarr.errors import MetadataError from zarr.util import json_dumps, json_loads -from typing import cast, Union, Any, List, Mapping as MappingType +from typing import cast, Union, Any, List, Mapping as MappingType, Optional ZARR_FORMAT = 2 ZARR_FORMAT_v3 = 3 @@ -400,6 +402,48 @@ def decode_hierarchy_metadata( raise ValueError(f"Unexpected keys in metdata. meta={meta}") return meta + @classmethod + def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: + if codec is None: + return None + + # only support gzip for now + config = codec.get_config() + del config["id"] + uri = 'https://purl.org/zarr/spec/codec/' + if isinstance(codec, numcodecs.GZip): + uri = uri + "gzip/1.0" + elif isinstance(codec, numcodecs.Zlib): + uri = uri + "zlib/1.0" + elif isinstance(codec, numcodecs.Blosc): + uri = uri + "blosc/1.0" + meta = { + "codec": uri, + "configuration": config, + } + return meta + + @classmethod + def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: + if meta is None: + return None + + uri = 'https://purl.org/zarr/spec/codec/' + conf = meta['configuration'] + if meta['codec'].startswith(uri + 'gzip/'): + codec = numcodecs.GZip(level=conf['level']) + elif meta['codec'].startswith(uri + 'zlib/'): + codec = numcodecs.Zlib() + elif meta['codec'].startswith(uri + 'blosc/'): + codec = numcodecs.Blosc(clevel=conf['clevel'], + shuffle=conf['shuffle'], + blocksize=conf['blocksize'], + cname=conf['cname']) + else: + raise NotImplementedError + + return codec + @classmethod def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) @@ -416,7 +460,7 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) # TODO: remove dimension_separator? - compressor = meta.get("compressor", None) + compressor = cls._decode_codec_metadata(meta.get("compressor", None)) meta = dict( shape=tuple(meta["shape"]), chunk_grid=dict( @@ -455,7 +499,7 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) else: object_codec = None - compressor = meta.get("compressor", None) + compressor = cls._encode_codec_metadata(meta.get("compressor", None)) meta = dict( shape=meta["shape"] + sdshape, chunk_grid=dict( diff --git a/zarr/storage.py b/zarr/storage.py index b81baf984d..4d8b06b28e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -562,7 +562,7 @@ def _init_array_metadata( # obtain compressor config compressor_config = None - if compressor: + if store_version == 2 and compressor: try: compressor_config = compressor.get_config() except AttributeError as e: @@ -598,7 +598,8 @@ def _init_array_metadata( # initialize metadata # TODO: don't store redundant dimension_separator for v3? - meta = dict(shape=shape, compressor=compressor_config, + _compressor = compressor_config if store_version == 2 else compressor + meta = dict(shape=shape, compressor=_compressor, fill_value=fill_value, dimension_separator=dimension_separator) if store_version < 3: diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 23b202d618..64fbb26843 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -95,7 +95,7 @@ def test_init_array(self, dimension_separator_fixture_v3): assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] - assert default_compressor.get_config() == meta['compressor'] + assert default_compressor == meta['compressor'] assert meta['fill_value'] is None # Missing MUST be assumed to be "/" assert meta.get('dimension_separator', "/") is want_dim_sep @@ -118,7 +118,7 @@ def _test_init_array_overwrite(self, order): chunk_shape=(200,), separator=('/')), data_type=np.dtype('u1'), - compressor=Zlib(1).get_config(), + compressor=Zlib(1), fill_value=0, chunk_memory_layout=order, filters=None) @@ -165,7 +165,7 @@ def test_init_array_path(self): assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] - assert default_compressor.get_config() == meta['compressor'] + assert default_compressor == meta['compressor'] assert meta['fill_value'] is None store.close() @@ -179,7 +179,7 @@ def _test_init_array_overwrite_path(self, order): chunk_shape=(200,), separator=('/')), data_type=np.dtype('u1'), - compressor=Zlib(1).get_config(), + compressor=Zlib(1), fill_value=0, chunk_memory_layout=order, filters=None) From 9bfeacb08f4690054622029e5335264fb56663d4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 00:35:56 -0500 Subject: [PATCH 010/109] add missing level to Zlib in _decode_codec_metadata --- zarr/meta.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/zarr/meta.py b/zarr/meta.py index 893d3efbdf..5b031d48ff 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -417,6 +417,12 @@ def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: uri = uri + "zlib/1.0" elif isinstance(codec, numcodecs.Blosc): uri = uri + "blosc/1.0" + elif isinstance(codec, numcodecs.BZ2): + uri = uri + "bz2/1.0" + elif isinstance(codec, numcodecs.LZ4): + uri = uri + "lz4/1.0" + elif isinstance(codec, numcodecs.LZMA): + uri = uri + "lzma/1.0" meta = { "codec": uri, "configuration": config, @@ -433,12 +439,21 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: if meta['codec'].startswith(uri + 'gzip/'): codec = numcodecs.GZip(level=conf['level']) elif meta['codec'].startswith(uri + 'zlib/'): - codec = numcodecs.Zlib() + codec = numcodecs.Zlib(level=conf['level']) elif meta['codec'].startswith(uri + 'blosc/'): codec = numcodecs.Blosc(clevel=conf['clevel'], shuffle=conf['shuffle'], blocksize=conf['blocksize'], cname=conf['cname']) + elif meta['codec'].startswith(uri + 'bz2/'): + codec = numcodecs.BZ2(level=conf['level']) + elif meta['codec'].startswith(uri + 'lz4/'): + codec = numcodecs.LZ4(acceleration=conf['acceleration']) + elif meta['codec'].startswith(uri + 'lzma/'): + codec = numcodecs.LZMA(format=conf['format'], + check=conf['check'], + preset=conf['preset'], + filters=conf['filters']) else: raise NotImplementedError From 23aca426b565a87974c1bb30affd37ad76b6790a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 00:56:48 -0500 Subject: [PATCH 011/109] add extensions entry to v3 array metadata --- zarr/meta.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zarr/meta.py b/zarr/meta.py index 5b031d48ff..7460b2a9ac 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -476,6 +476,7 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A # TODO: remove dimension_separator? compressor = cls._decode_codec_metadata(meta.get("compressor", None)) + extensions = meta.get("extensions", []) meta = dict( shape=tuple(meta["shape"]), chunk_grid=dict( @@ -488,6 +489,7 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A chunk_memory_layout=meta["chunk_memory_layout"], dimension_separator=meta.get("dimension_separator", "/"), attributes=meta["attributes"], + extensions=extensions, ) # compressor field should be absent when there is no compression if compressor: @@ -515,6 +517,7 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: else: object_codec = None compressor = cls._encode_codec_metadata(meta.get("compressor", None)) + extensions = meta.get("extensions", []) meta = dict( shape=meta["shape"] + sdshape, chunk_grid=dict( @@ -526,6 +529,7 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: fill_value=encode_fill_value(meta["fill_value"], dtype, object_codec), chunk_memory_layout=meta["chunk_memory_layout"], attributes=meta.get("attributes", {}), + extensions=extensions, ) if compressor: meta["compressor"] = compressor From e47035dc979c0d19903f4cca697afa16feb44394 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 01:17:52 -0500 Subject: [PATCH 012/109] dimension_separator should not be in the array metadata for v3 --- zarr/meta.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 7460b2a9ac..f187c90102 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -487,7 +487,6 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A data_type=dtype, fill_value=fill_value, chunk_memory_layout=meta["chunk_memory_layout"], - dimension_separator=meta.get("dimension_separator", "/"), attributes=meta["attributes"], extensions=extensions, ) @@ -495,9 +494,6 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A if compressor: meta['compressor'] = compressor - # dimension_separator = meta.get("dimension_separator", None) - # if dimension_separator: - # meta["dimension_separator"] = dimension_separator except Exception as e: raise MetadataError("error decoding metadata: %s" % e) else: From 7549d5b856cf696bbd9edd0d58d1740ad163e016 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 14:59:43 -0500 Subject: [PATCH 013/109] update Attributes, adding StoreV3 support avoid pytest error about missing fixture fix flake8 error related to zarr_version fixture --- zarr/attrs.py | 68 +++++++++++++++++---- zarr/tests/test_attrs.py | 125 +++++++++++++++++++++++++-------------- zarr/tests/test_sync.py | 3 +- 3 files changed, 141 insertions(+), 55 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index eff1237db1..78c26461c4 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,6 +1,6 @@ from collections.abc import MutableMapping -from zarr._storage.store import Store +from zarr._storage.store import Store, StoreV3 from zarr.util import json_dumps @@ -26,7 +26,15 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): - self.store = Store._ensure_store(store) + + self._version = getattr(store, '_store_version', 2) + assert key + + if self._version == 3 and '.z' in key: + raise ValueError('invalid v3 key') + + _Store = Store if self._version == 2 else StoreV3 + self.store = _Store._ensure_store(store) self.key = key self.read_only = read_only self.cache = cache @@ -38,6 +46,8 @@ def _get_nosync(self): data = self.store[self.key] except KeyError: d = dict() + if self._version > 2: + d['attributes'] = {} else: d = self.store._metadata_class.parse_metadata(data) return d @@ -47,6 +57,8 @@ def asdict(self): if self.cache and self._cached_asdict is not None: return self._cached_asdict d = self._get_nosync() + if self._version == 3: + d = d['attributes'] if self.cache: self._cached_asdict = d return d @@ -54,7 +66,10 @@ def asdict(self): def refresh(self): """Refresh cached attributes from the store.""" if self.cache: - self._cached_asdict = self._get_nosync() + if self._version == 3: + self._cached_asdict = self._get_nosync()['attributes'] + else: + self._cached_asdict = self._get_nosync() def __contains__(self, x): return x in self.asdict() @@ -84,7 +99,10 @@ def _setitem_nosync(self, item, value): d = self._get_nosync() # set key value - d[item] = value + if self._version == 2: + d[item] = value + else: + d['attributes'][item] = value # _put modified data self._put_nosync(d) @@ -98,7 +116,10 @@ def _delitem_nosync(self, key): d = self._get_nosync() # delete key value - del d[key] + if self._version == 2: + del d[key] + else: + del d['attributes'][key] # _put modified data self._put_nosync(d) @@ -106,12 +127,34 @@ def _delitem_nosync(self, key): def put(self, d): """Overwrite all attributes with the key/value pairs in the provided dictionary `d` in a single operation.""" - self._write_op(self._put_nosync, d) + if self._version == 2: + self._write_op(self._put_nosync, d) + else: + self._write_op(self._put_nosync, dict(attributes=d)) def _put_nosync(self, d): - self.store[self.key] = json_dumps(d) - if self.cache: - self._cached_asdict = d + if self._version == 2: + self.store[self.key] = json_dumps(d) + if self.cache: + self._cached_asdict = d + else: + if self.key in self.store: + # Cannot write the attributes directly to JSON, but have to + # store it within the pre-existing attributes key of the v3 + # metadata. + + # Note: this changes the store.counter result in test_caching_on! + + meta = self.store._metadata_class.parse_metadata(self.store[self.key]) + if 'attributes' in meta and 'filters' in meta['attributes']: + # need to preserve any existing "filters" attribute + d['attributes']['filters'] = meta['attributes']['filters'] + meta['attributes'] = d['attributes'] + else: + meta = d + self.store[self.key] = json_dumps(meta) + if self.cache: + self._cached_asdict = d['attributes'] # noinspection PyMethodOverriding def update(self, *args, **kwargs): @@ -124,7 +167,12 @@ def _update_nosync(self, *args, **kwargs): d = self._get_nosync() # update - d.update(*args, **kwargs) + if self._version == 2: + d.update(*args, **kwargs) + else: + if 'attributes' not in d: + d['attributes'] = {} + d['attributes'].update(*args, **kwargs) # _put modified data self._put_nosync(d) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index b2de736d4a..62faf662da 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -3,8 +3,20 @@ import pytest from zarr.attrs import Attributes -from zarr.tests.util import CountingDict -from zarr.storage import KVStore +from zarr.storage import KVStore, KVStoreV3 +from zarr.tests.util import CountingDict, CountingDictV3 + + +@pytest.fixture(params=[2, 3]) +def zarr_version(request): + return request.param + + +def _init_store(version): + """Use a plain dict() for v2, but KVStoreV3 otherwise.""" + if version == 2: + return dict() + return KVStoreV3(dict()) class TestAttributes(): @@ -12,13 +24,9 @@ class TestAttributes(): def init_attributes(self, store, read_only=False, cache=True): return Attributes(store, key='attrs', read_only=read_only, cache=cache) - @pytest.mark.parametrize('store_from_dict', [False, True]) - def test_storage(self, store_from_dict): + def test_storage(self, zarr_version): - if store_from_dict: - store = dict() - else: - store = KVStore(dict()) + store = _init_store(zarr_version) a = Attributes(store=store, key='attrs') assert isinstance(a.store, KVStore) assert 'foo' not in a @@ -30,11 +38,14 @@ def test_storage(self, store_from_dict): assert 'attrs' in store assert isinstance(store['attrs'], bytes) d = json.loads(str(store['attrs'], 'ascii')) + if zarr_version == 3: + d = d['attributes'] assert dict(foo='bar', baz=42) == d - def test_get_set_del_contains(self): + def test_get_set_del_contains(self, zarr_version): - a = self.init_attributes(dict()) + store = _init_store(zarr_version) + a = self.init_attributes(store) assert 'foo' not in a a['foo'] = 'bar' a['baz'] = 42 @@ -48,9 +59,10 @@ def test_get_set_del_contains(self): # noinspection PyStatementEffect a['foo'] - def test_update_put(self): + def test_update_put(self, zarr_version): - a = self.init_attributes(dict()) + store = _init_store(zarr_version) + a = self.init_attributes(store) assert 'foo' not in a assert 'bar' not in a assert 'baz' not in a @@ -65,9 +77,10 @@ def test_update_put(self): assert a['bar'] == 84 assert 'baz' not in a - def test_iterators(self): + def test_iterators(self, zarr_version): - a = self.init_attributes(dict()) + store = _init_store(zarr_version) + a = self.init_attributes(store) assert 0 == len(a) assert set() == set(a) assert set() == set(a.keys()) @@ -83,10 +96,13 @@ def test_iterators(self): assert {'bar', 42} == set(a.values()) assert {('foo', 'bar'), ('baz', 42)} == set(a.items()) - def test_read_only(self): - store = dict() + def test_read_only(self, zarr_version): + store = _init_store(zarr_version) a = self.init_attributes(store, read_only=True) - store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') + if zarr_version == 2: + store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') + else: + store['attrs'] = json.dumps(dict(attributes=dict(foo='bar', baz=42))).encode('ascii') assert a['foo'] == 'bar' assert a['baz'] == 42 with pytest.raises(PermissionError): @@ -96,8 +112,9 @@ def test_read_only(self): with pytest.raises(PermissionError): a.update(foo='quux') - def test_key_completions(self): - a = self.init_attributes(dict()) + def test_key_completions(self, zarr_version): + store = _init_store(zarr_version) + a = self.init_attributes(store) d = a._ipython_key_completions_() assert 'foo' not in d assert '123' not in d @@ -112,14 +129,17 @@ def test_key_completions(self): assert 'asdf;' in d assert 'baz' not in d - def test_caching_on(self): + def test_caching_on(self, zarr_version): # caching is turned on by default # setup store - store = CountingDict() + store = CountingDict() if zarr_version == 2 else CountingDictV3() assert 0 == store.counter['__getitem__', 'attrs'] assert 0 == store.counter['__setitem__', 'attrs'] - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + if zarr_version == 2: + store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + else: + store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') assert 0 == store.counter['__getitem__', 'attrs'] assert 1 == store.counter['__setitem__', 'attrs'] @@ -136,54 +156,65 @@ def test_caching_on(self): # test __setitem__ updates the cache a['foo'] = 'yyy' - assert 2 == store.counter['__getitem__', 'attrs'] + get_cnt = 2 if zarr_version == 2 else 3 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 2 == store.counter['__setitem__', 'attrs'] assert a['foo'] == 'yyy' - assert 2 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 2 == store.counter['__setitem__', 'attrs'] # test update() updates the cache a.update(foo='zzz', bar=84) - assert 3 == store.counter['__getitem__', 'attrs'] + get_cnt = 3 if zarr_version == 2 else 5 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] assert a['foo'] == 'zzz' assert a['bar'] == 84 - assert 3 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] # test __contains__ uses the cache assert 'foo' in a - assert 3 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] assert 'spam' not in a - assert 3 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] # test __delitem__ updates the cache del a['bar'] - assert 4 == store.counter['__getitem__', 'attrs'] + get_cnt = 4 if zarr_version == 2 else 7 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 4 == store.counter['__setitem__', 'attrs'] assert 'bar' not in a - assert 4 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 4 == store.counter['__setitem__', 'attrs'] # test refresh() - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') - assert 4 == store.counter['__getitem__', 'attrs'] + if zarr_version == 2: + store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + else: + store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') + assert get_cnt == store.counter['__getitem__', 'attrs'] a.refresh() - assert 5 == store.counter['__getitem__', 'attrs'] + get_cnt = 5 if zarr_version == 2 else 8 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert a['foo'] == 'xxx' - assert 5 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] assert a['bar'] == 42 - assert 5 == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', 'attrs'] - def test_caching_off(self): + def test_caching_off(self, zarr_version): # setup store - store = CountingDict() + store = CountingDict() if zarr_version == 2 else CountingDictV3() assert 0 == store.counter['__getitem__', 'attrs'] assert 0 == store.counter['__setitem__', 'attrs'] - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + + if zarr_version == 2: + store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + else: + store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') assert 0 == store.counter['__getitem__', 'attrs'] assert 1 == store.counter['__setitem__', 'attrs'] @@ -200,25 +231,31 @@ def test_caching_off(self): # test __setitem__ a['foo'] = 'yyy' - assert 4 == store.counter['__getitem__', 'attrs'] + get_cnt = 4 if zarr_version == 2 else 5 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 2 == store.counter['__setitem__', 'attrs'] assert a['foo'] == 'yyy' - assert 5 == store.counter['__getitem__', 'attrs'] + get_cnt = 5 if zarr_version == 2 else 6 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 2 == store.counter['__setitem__', 'attrs'] # test update() a.update(foo='zzz', bar=84) - assert 6 == store.counter['__getitem__', 'attrs'] + get_cnt = 6 if zarr_version == 2 else 8 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] assert a['foo'] == 'zzz' assert a['bar'] == 84 - assert 8 == store.counter['__getitem__', 'attrs'] + get_cnt = 8 if zarr_version == 2 else 10 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] # test __contains__ assert 'foo' in a - assert 9 == store.counter['__getitem__', 'attrs'] + get_cnt = 9 if zarr_version == 2 else 11 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] assert 'spam' not in a - assert 10 == store.counter['__getitem__', 'attrs'] + get_cnt = 10 if zarr_version == 2 else 12 + assert get_cnt == store.counter['__getitem__', 'attrs'] assert 3 == store.counter['__setitem__', 'attrs'] diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 69fc0d7708..1a763dc7f7 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -15,7 +15,8 @@ from zarr.storage import (DirectoryStore, KVStore, atexit_rmtree, init_array, init_group) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer -from zarr.tests.test_attrs import TestAttributes +# zarr_version fixture must be imported although not used directly here +from zarr.tests.test_attrs import TestAttributes, zarr_version # noqa from zarr.tests.test_core import TestArray from zarr.tests.test_hierarchy import TestGroup From 765fe7d69adae1a6f57ee8555ffb0cf1da749fae Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 16:30:24 -0500 Subject: [PATCH 014/109] add StoreV3 support to core Array object --- zarr/core.py | 147 ++++-- zarr/tests/test_core.py | 3 +- zarr/tests/test_core_v3.py | 898 +++++++++++++++++++++++++++++++++++++ 3 files changed, 1012 insertions(+), 36 deletions(-) create mode 100644 zarr/tests/test_core_v3.py diff --git a/zarr/core.py b/zarr/core.py index 6f6b468e3b..07096ab1c4 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -4,13 +4,14 @@ import math import operator import re +from collections.abc import MutableMapping from functools import reduce +from typing import Any import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray -from collections.abc import MutableMapping - +from zarr._storage.store import _prefix_to_attrs_key from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError @@ -31,7 +32,13 @@ is_scalar, pop_fields, ) -from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore +from zarr.storage import ( + _get_hierarchy_metadata, + _prefix_to_array_key, + getsize, + listdir, + normalize_store_arg, +) from zarr.util import ( all_equal, InfoReporter, @@ -146,7 +153,7 @@ class Array: def __init__( self, - store: BaseStore, + store: Any, # BaseStore not stricly required due to normalize_store_arg path=None, read_only=False, chunk_store=None, @@ -155,12 +162,22 @@ def __init__( cache_attrs=True, partial_decompress=False, write_empty_chunks=True, + zarr_version=None, ): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized - store = BaseStore._ensure_store(store) - chunk_store = BaseStore._ensure_store(chunk_store) + store = normalize_store_arg(store, zarr_version=zarr_version) + if zarr_version is None: + zarr_version = getattr(store, '_store_version', 2) + + if chunk_store is not None: + chunk_store = normalize_store_arg(chunk_store, + zarr_version=zarr_version) + if not getattr(chunk_store, '_store_version', 2) == zarr_version: + raise ValueError( + "zarr_version of store and chunk_store must match" + ) self._store = store self._chunk_store = chunk_store @@ -175,12 +192,19 @@ def __init__( self._is_view = False self._partial_decompress = partial_decompress self._write_empty_chunks = write_empty_chunks + self._version = zarr_version + + if self._version == 3: + self._data_key_prefix = 'data/root/' + self._key_prefix + self._data_path = 'data/root/' + self._path + self._hierarchy_metadata = _get_hierarchy_metadata(store=None) + self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] # initialize metadata self._load_metadata() # initialize attributes - akey = self._key_prefix + attrs_key + akey = _prefix_to_attrs_key(self._store, self._key_prefix) self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer, cache=cache_attrs) @@ -196,13 +220,13 @@ def _load_metadata(self): if self._synchronizer is None: self._load_metadata_nosync() else: - mkey = self._key_prefix + array_meta_key + mkey = _prefix_to_array_key(self._store, self._key_prefix) with self._synchronizer[mkey]: self._load_metadata_nosync() def _load_metadata_nosync(self): try: - mkey = self._key_prefix + array_meta_key + mkey = _prefix_to_array_key(self._store, self._key_prefix) meta_bytes = self._store[mkey] except KeyError: raise ArrayNotFoundError(self._path) @@ -212,21 +236,31 @@ def _load_metadata_nosync(self): meta = self._store._metadata_class.decode_array_metadata(meta_bytes) self._meta = meta self._shape = meta['shape'] - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] self._fill_value = meta['fill_value'] - self._order = meta['order'] - dimension_separator = meta.get('dimension_separator', None) - if dimension_separator is None: - try: - dimension_separator = self._store._dimension_separator - except (AttributeError, KeyError): - pass - - # Fallback for any stores which do not choose a default + if self._version == 2: + self._chunks = meta['chunks'] + self._dtype = meta['dtype'] + self._order = meta['order'] if dimension_separator is None: - dimension_separator = "." + try: + dimension_separator = self._store._dimension_separator + except (AttributeError, KeyError): + pass + + # Fallback for any stores which do not choose a default + if dimension_separator is None: + dimension_separator = "." + else: + self._chunks = meta['chunk_grid']['chunk_shape'] + self._dtype = meta['data_type'] + self._order = meta['chunk_memory_layout'] + if dimension_separator is None: + # TODO: omit attribute in v3? + dimension_separator = meta.get('dimension_separator', '/') + chunk_separator = meta['chunk_grid']['separator'] + assert chunk_separator == dimension_separator + self._dimension_separator = dimension_separator # setup compressor @@ -237,7 +271,12 @@ def _load_metadata_nosync(self): self._compressor = get_codec(config) # setup filters - filters = meta['filters'] + if self._version == 2: + filters = meta.get('filters', []) + else: + # TODO: storing filters under attributes for now since the v3 + # array metadata does not have a 'filters' attribute. + filters = meta['attributes'].get('filters', []) if filters: filters = [get_codec(config) for config in filters] self._filters = filters @@ -262,10 +301,22 @@ def _flush_metadata_nosync(self): filters_config = [f.get_config() for f in self._filters] else: filters_config = None - meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype, - compressor=compressor_config, fill_value=self._fill_value, - order=self._order, filters=filters_config) - mkey = self._key_prefix + array_meta_key + meta = dict(shape=self._shape, compressor=compressor_config, + fill_value=self._fill_value, filters=filters_config) + if getattr(self._store, '_store_version', 2) == 2: + meta.update( + dict(chunks=self._chunks, dtype=self._dtype, order=self._order) + ) + else: + meta.update( + dict(chunk_grid=dict(type='regular', + chunk_shape=self._chunks, + separator=self._dimension_separator), + data_type=self._dtype, + chunk_memory_layout=self._order, + attributes=self.attrs.asdict()) + ) + mkey = _prefix_to_array_key(self._store, self._key_prefix) self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) @property @@ -453,11 +504,28 @@ def nchunks(self): def nchunks_initialized(self): """The number of chunks that have been initialized with some data.""" - # key pattern for chunk keys - prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) - # count chunk keys - return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) + if self._version == 3: + # # key pattern for chunk keys + # prog = re.compile(r'\.'.join([r'c\d+'] * min(1, self.ndim))) + # # get chunk keys, excluding the prefix + # members = self.chunk_store.list_prefix(self._data_path) + # members = [k.split(self._data_key_prefix)[1] for k in members] + # # count the chunk keys + # return sum(1 for k in members if prog.match(k)) + + # key pattern for chunk keys + prog = re.compile(self._data_key_prefix + r'c\d+') # TODO: ndim == 0 case? + # get chunk keys, excluding the prefix + members = self.chunk_store.list_prefix(self._data_path) + # count the chunk keys + return sum(1 for k in members if prog.match(k)) + else: + # key pattern for chunk keys + prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) + + # count chunk keys + return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) # backwards compatibility initialized = nchunks_initialized @@ -2061,7 +2129,15 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return chunk def _chunk_key(self, chunk_coords): - return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) + if self._version == 3: + # _chunk_key() corresponds to data_key(P, i, j, ...) example in the spec + # where P = self._key_prefix, i, j, ... = chunk_coords + # e.g. c0/2/3 for 3d array with chunk index (0, 2, 3) + # https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids + return ("data/root/" + self._key_prefix + + "c" + self._dimension_separator.join(map(str, chunk_coords))) + else: + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress @@ -2242,7 +2318,8 @@ def digest(self, hashname="sha1"): for i in itertools.product(*[range(s) for s in self.cdata_shape]): h.update(self.chunk_store.get(self._chunk_key(i), b"")) - h.update(self.store.get(self._key_prefix + array_meta_key, b"")) + mkey = _prefix_to_array_key(self._store, self._key_prefix) + h.update(self.store.get(mkey, b"")) h.update(self.store.get(self.attrs.key, b"")) @@ -2279,7 +2356,7 @@ def hexdigest(self, hashname="sha1"): def __getstate__(self): return (self._store, self._path, self._read_only, self._chunk_store, self._synchronizer, self._cache_metadata, self._attrs.cache, - self._partial_decompress, self._write_empty_chunks) + self._partial_decompress, self._write_empty_chunks, self._version) def __setstate__(self, state): self.__init__(*state) @@ -2292,7 +2369,7 @@ def _synchronized_op(self, f, *args, **kwargs): else: # synchronize on the array - mkey = self._key_prefix + array_meta_key + mkey = _prefix_to_array_key(self._store, self._key_prefix) lock = self._synchronizer[mkey] with lock: @@ -2559,7 +2636,7 @@ def view(self, shape=None, chunks=None, dtype=None, if synchronizer is None: synchronizer = self._synchronizer a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, - synchronizer=synchronizer, cache_metadata=True) + synchronizer=synchronizer, cache_metadata=True, zarr_version=self._version) a._is_view = True # allow override of some properties diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 7423132887..938a58b494 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -43,6 +43,8 @@ class TestArray(unittest.TestCase): + _version = 2 + def test_array_init(self): # normal initialization @@ -1180,7 +1182,6 @@ def test_object_arrays(self): def test_object_arrays_vlen_text(self): data = np.array(greetings * 1000, dtype=object) - z = self.create_array(shape=data.shape, dtype=object, object_codec=VLenUTF8()) z[0] = 'foo' assert z[0] == 'foo' diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py new file mode 100644 index 0000000000..d0d08b29bb --- /dev/null +++ b/zarr/tests/test_core_v3.py @@ -0,0 +1,898 @@ +import atexit +import os +import shutil +from tempfile import mkdtemp, mktemp + +import numpy as np +import pytest +from numcodecs import (Blosc, Zlib) +from numcodecs.compat import ensure_bytes +from numpy.testing import assert_array_almost_equal, assert_array_equal + +from zarr.core import Array +from zarr.errors import ArrayNotFoundError, ContainsGroupError +from zarr.meta import json_loads +from zarr.storage import ( + # ABSStoreV3, + DBMStoreV3, + DirectoryStoreV3, + FSStoreV3, + KVStoreV3, + LMDBStoreV3, + LRUStoreCacheV3, + NestedDirectoryStoreV3, + SQLiteStoreV3, + StoreV3, + atexit_rmglob, + atexit_rmtree, + init_array, + init_group, +) +from zarr.tests.test_core import TestArrayWithPath +from zarr.tests.util import have_fsspec +from zarr.util import buffer_size + + +# Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied + +class TestArrayWithPathV3(TestArrayWithPath): + + _version = 3 + + @staticmethod + def create_array(array_path='arr1', read_only=False, **kwargs): + store = KVStoreV3(dict()) + kwargs.setdefault('compressor', Zlib(level=1)) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + init_array(store, path=array_path, **kwargs) + return Array(store, path=array_path, read_only=read_only, + cache_metadata=cache_metadata, cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks) + + def test_array_init(self): + + # should not be able to initialize without a path in V3 + store = KVStoreV3(dict()) + with pytest.raises(ValueError): + init_array(store, shape=100, chunks=10, dtype=" Date: Thu, 16 Dec 2021 22:45:09 -0500 Subject: [PATCH 015/109] update hexdigests --- zarr/core.py | 2 +- zarr/tests/test_core_v3.py | 72 +++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 07096ab1c4..362dd20a70 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -264,7 +264,7 @@ def _load_metadata_nosync(self): self._dimension_separator = dimension_separator # setup compressor - config = meta['compressor'] + config = meta.get('compressor', None) if config is None: self._compressor = None else: diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index d0d08b29bb..8cd423ce76 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -70,7 +70,7 @@ def test_array_init(self): assert '/' + path == a.name # TODO: should this include meta/root? assert 'bar' == a.basename assert store is a.store - assert "402b6d5509c264ef9f0a881007300a39d27c4990" == a.hexdigest() + assert "ee4a373f919c842589ace4fc6b787818d7668134" == a.hexdigest() # store not initialized store = KVStoreV3(dict()) @@ -186,11 +186,11 @@ def test_dtypes(self): def expected(self): return [ - "b3c57672933da371618596f48d87c2c1bdf76445", - "a71e05a0356c4c03418fb08b036238bad7c92a70", - "1d89bc519dbdbf8457569fc9f95f621da00294be", - "c5247a55edb89fe36105e2b46009d2a123de38a5", - "011965beb88874e928cfedcad9918418d9b37be5", + "7ff7c75be0c73c41f2c3624043bbdc730fa49f92", + "5821d73290a1bddc7626157724b8f4e86c7b519e", + "d27383c79f67bc30d98f80842075c714971615a4", + "f139aea7b4803e7fcae84fdf665efeb90f93c2ae", + "7101c03c6e3b44b03721fa1fb036e9a06504c985", ] def test_hexdigest(self): @@ -260,25 +260,25 @@ def create_array(array_path='arr1', read_only=False, **kwargs): def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Fri, 17 Dec 2021 00:12:16 -0500 Subject: [PATCH 016/109] handle additional codecs that were not implemented in zarrita update hexdigests --- zarr/core.py | 11 +++--- zarr/storage.py | 3 ++ zarr/tests/test_core_v3.py | 72 +++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 362dd20a70..a9932015b8 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -264,11 +264,13 @@ def _load_metadata_nosync(self): self._dimension_separator = dimension_separator # setup compressor - config = meta.get('compressor', None) - if config is None: + compressor = meta.get('compressor', None) + if compressor is None: self._compressor = None + elif self._version == 2: + self._compressor = get_codec(compressor) else: - self._compressor = get_codec(config) + self._compressor = compressor # setup filters if self._version == 2: @@ -301,7 +303,8 @@ def _flush_metadata_nosync(self): filters_config = [f.get_config() for f in self._filters] else: filters_config = None - meta = dict(shape=self._shape, compressor=compressor_config, + _compressor = compressor_config if self._version == 2 else self._compressor + meta = dict(shape=self._shape, compressor=_compressor, fill_value=self._fill_value, filters=filters_config) if getattr(self._store, '_store_version', 2) == 2: meta.update( diff --git a/zarr/storage.py b/zarr/storage.py index 4d8b06b28e..abb27e991c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -468,6 +468,9 @@ def init_array( # initialize with default zarr.json entry level metadata store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + if not compressor: + # compatibility with legacy tests using compressor=[] + compressor = None _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index 8cd423ce76..ac282ae092 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -70,7 +70,7 @@ def test_array_init(self): assert '/' + path == a.name # TODO: should this include meta/root? assert 'bar' == a.basename assert store is a.store - assert "ee4a373f919c842589ace4fc6b787818d7668134" == a.hexdigest() + assert "5eaa4f7f3e9035677c52cf32d4cd68db6bb92960" == a.hexdigest() # store not initialized store = KVStoreV3(dict()) @@ -186,11 +186,11 @@ def test_dtypes(self): def expected(self): return [ - "7ff7c75be0c73c41f2c3624043bbdc730fa49f92", - "5821d73290a1bddc7626157724b8f4e86c7b519e", - "d27383c79f67bc30d98f80842075c714971615a4", - "f139aea7b4803e7fcae84fdf665efeb90f93c2ae", - "7101c03c6e3b44b03721fa1fb036e9a06504c985", + "46f1c7f9f147791b18bc8a6ca39a0d6059368a53", + "2ebe1ed1164f79f7316cbca7fc855b99d108c34e", + "2b9ea26a23611be10cd4e2580fc0a9a80c001774", + "ef95d7af1e4edb37617c829846baeac4945785ba", + "8240cf3457152da3f66570abd6473f0f8d897532", ] def test_hexdigest(self): @@ -260,25 +260,25 @@ def create_array(array_path='arr1', read_only=False, **kwargs): def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Fri, 17 Dec 2021 01:17:19 -0500 Subject: [PATCH 017/109] fix --- zarr/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index a9932015b8..59dbfd1785 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -255,11 +255,12 @@ def _load_metadata_nosync(self): self._chunks = meta['chunk_grid']['chunk_shape'] self._dtype = meta['data_type'] self._order = meta['chunk_memory_layout'] + chunk_separator = meta['chunk_grid']['separator'] if dimension_separator is None: # TODO: omit attribute in v3? - dimension_separator = meta.get('dimension_separator', '/') - chunk_separator = meta['chunk_grid']['separator'] - assert chunk_separator == dimension_separator + dimension_separator = meta.get('dimension_separator', chunk_separator) + else: + assert chunk_separator == dimension_separator self._dimension_separator = dimension_separator From 50401191500521da69ca79b078e3d967ad3c1b92 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 01:34:59 -0500 Subject: [PATCH 018/109] fix hexdigests --- zarr/tests/test_core_v3.py | 72 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index ac282ae092..d0a51088b0 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -70,7 +70,7 @@ def test_array_init(self): assert '/' + path == a.name # TODO: should this include meta/root? assert 'bar' == a.basename assert store is a.store - assert "5eaa4f7f3e9035677c52cf32d4cd68db6bb92960" == a.hexdigest() + assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest() # store not initialized store = KVStoreV3(dict()) @@ -186,11 +186,11 @@ def test_dtypes(self): def expected(self): return [ - "46f1c7f9f147791b18bc8a6ca39a0d6059368a53", - "2ebe1ed1164f79f7316cbca7fc855b99d108c34e", - "2b9ea26a23611be10cd4e2580fc0a9a80c001774", - "ef95d7af1e4edb37617c829846baeac4945785ba", - "8240cf3457152da3f66570abd6473f0f8d897532", + "73ab8ace56719a5c9308c3754f5e2d57bc73dc20", + "5fb3d02b8f01244721582929b3cad578aec5cea5", + "26b098bedb640846e18dc2fbc1c27684bb02b532", + "799a458c287d431d747bec0728987ca4fe764549", + "c780221df84eb91cb62f633f12d3f1eaa9cee6bd", ] def test_hexdigest(self): @@ -260,25 +260,25 @@ def create_array(array_path='arr1', read_only=False, **kwargs): def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Fri, 17 Dec 2021 17:01:11 -0500 Subject: [PATCH 019/109] fix indentation --- zarr/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/core.py b/zarr/core.py index 59dbfd1785..e70d5591f7 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -269,7 +269,7 @@ def _load_metadata_nosync(self): if compressor is None: self._compressor = None elif self._version == 2: - self._compressor = get_codec(compressor) + self._compressor = get_codec(compressor) else: self._compressor = compressor From d07caed04966567d9433d6390f685c4c8dbe014a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 19:21:36 -0500 Subject: [PATCH 020/109] add StoreV3 support to Group, open_group, etc. --- zarr/hierarchy.py | 265 ++++++++++++---- zarr/tests/test_hierarchy.py | 569 +++++++++++++++++++++++++++++------ 2 files changed, 687 insertions(+), 147 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 763a5f1631..53db0e617e 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -6,8 +6,7 @@ from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import (array, create, empty, empty_like, full, full_like, - normalize_store_arg, ones, ones_like, zeros, - zeros_like) + ones, ones_like, zeros, zeros_like) from zarr.errors import ( ContainsArrayError, ContainsGroupError, @@ -15,14 +14,18 @@ ReadOnlyError, ) from zarr.storage import ( + _get_hierarchy_metadata, + _prefix_to_group_key, BaseStore, MemoryStore, + MemoryStoreV3, attrs_key, contains_array, contains_group, group_meta_key, init_group, listdir, + normalize_store_arg, rename, rmdir, ) @@ -109,9 +112,17 @@ class Group(MutableMapping): """ def __init__(self, store, path=None, read_only=False, chunk_store=None, - cache_attrs=True, synchronizer=None): - store: BaseStore = BaseStore._ensure_store(store) - chunk_store: BaseStore = BaseStore._ensure_store(chunk_store) + cache_attrs=True, synchronizer=None, zarr_version=None): + store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) + if zarr_version is None: + zarr_version = getattr(store, '_store_version', 2) + if zarr_version > 2 and path: + if path.startswith(("meta/", "data/")): + raise ValueError("path must note start with 'meta/' or 'data/'") + if chunk_store is not None: + chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) + if not getattr(chunk_store, '_store_version', 2) == zarr_version: + raise ValueError("zarr_version of store and chunk_store must match") self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -121,6 +132,13 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer + self._version = zarr_version + + if self._version == 3: + self._data_key_prefix = 'data/root/' + self._key_prefix + self._data_path = 'data/root/' + self._path + self._hierarchy_metadata = _get_hierarchy_metadata(store=None) + self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] # guard conditions if contains_array(store, path=self._path): @@ -128,15 +146,31 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, # initialize metadata try: - mkey = self._key_prefix + group_meta_key + mkey = _prefix_to_group_key(self._store, self._key_prefix) + assert not mkey.endswith("root/.group") meta_bytes = store[mkey] except KeyError: - raise GroupNotFoundError(path) + if self._version == 2: + raise GroupNotFoundError(path) + else: + implicit_prefix = 'meta/root/' + self._key_prefix + if not implicit_prefix.endswith('/'): + implicit_prefix += '/' + if self._store.list_prefix(implicit_prefix): + # implicit group does not have any metadata + self._meta = None + else: + raise GroupNotFoundError(path) else: self._meta = self._store._metadata_class.decode_group_metadata(meta_bytes) # setup attributes - akey = self._key_prefix + attrs_key + if self._version == 2: + akey = self._key_prefix + attrs_key + else: + # Note: mkey doesn't actually exist for implicit groups, but the + # object can still be created. + akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) @@ -227,11 +261,36 @@ def __iter__(self): quux """ - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if (contains_array(self._store, path) or - contains_group(self._store, path)): - yield key + if getattr(self._store, '_store_version', 2) == 2: + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if (contains_array(self._store, path) or + contains_group(self._store, path)): + yield key + else: + # TODO: Should this iterate over data folders and/or metadata + # folders and/or metadata files + + dir_path = 'meta/root/' + self._key_prefix + name_start = len(dir_path) + keys, prefixes = self._store.list_dir(dir_path) + + # yield any groups or arrays + sfx = self._metadata_key_suffix + for key in keys: + len_suffix = len('.group') + len(sfx) # same for .array + if key.endswith(('.group' + sfx, '.array' + sfx)): + yield key[name_start:-len_suffix] + + # also yield any implicit groups + for prefix in prefixes: + prefix = prefix.rstrip('/') + # only implicit if there is no .group.sfx file + if not prefix + '.group' + sfx in self._store: + yield prefix[name_start:] + + # Note: omit data/root/ to avoid duplicate listings + # any group in data/root/ must has an entry in meta/root/ def __len__(self): """Number of members.""" @@ -323,9 +382,11 @@ def __contains__(self, item): False """ + if self._version > 2 and item.startswith('meta/'): + raise ValueError("meta/ must not be in item") path = self._item_path(item) return contains_array(self._store, path) or \ - contains_group(self._store, path) + contains_group(self._store, path, explicit_only=False) def __getitem__(self, item): """Obtain a group member. @@ -352,11 +413,21 @@ def __getitem__(self, item): if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, - synchronizer=self._synchronizer, cache_attrs=self.attrs.cache) - elif contains_group(self._store, path): + synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, + zarr_version=self._version) + elif contains_group(self._store, path, explicit_only=True): return Group(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + synchronizer=self._synchronizer, zarr_version=self._version) + elif self._version == 3: + implicit_group = 'meta/root/' + path + '/' + # non-empty folder in the metadata path implies an implicit group + if self._store.list_prefix(implicit_group): + return Group(self._store, read_only=self._read_only, path=path, + chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, zarr_version=self._version) + else: + raise KeyError(item) else: raise KeyError(item) @@ -369,7 +440,7 @@ def __delitem__(self, item): def _delitem_nosync(self, item): path = self._item_path(item) if contains_array(self._store, path) or \ - contains_group(self._store, path): + contains_group(self._store, path, explicit_only=False): rmdir(self._store, path) else: raise KeyError(item) @@ -406,10 +477,24 @@ def group_keys(self): ['bar', 'foo'] """ - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path): - yield key + if self._version == 2: + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_group(self._store, path): + yield key + else: + dir_name = 'meta/root/' + self._path + sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] + group_sfx = '.group' + sfx + for key in sorted(listdir(self._store, dir_name)): + if key.endswith(group_sfx): + key = key[:-len(group_sfx)] + path = self._key_prefix + key + if path.endswith(".array" + sfx): + # skip array keys + continue + if contains_group(self._store, path, explicit_only=False): + yield key def groups(self): """Return an iterator over (name, value) pairs for groups only. @@ -428,13 +513,39 @@ def groups(self): foo """ - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path): - yield key, Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + if self._version == 2: + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_group(self._store, path, explicit_only=False): + yield key, Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version) + + else: + dir_name = 'meta/root/' + self._path + sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] + group_sfx = '.group' + sfx + for key in sorted(listdir(self._store, dir_name)): + if key.endswith(group_sfx): + key = key[:-len(group_sfx)] + path = self._key_prefix + key + if path.endswith(".array" + sfx): + # skip array keys + continue + if contains_group(self._store, path, explicit_only=False): + yield key, Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. @@ -491,14 +602,36 @@ def arrays(self, recurse=False): recurse=recurse) def _array_iter(self, keys_only, method, recurse): - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_array(self._store, path): - yield key if keys_only else (key, self[key]) - elif recurse and contains_group(self._store, path): - group = self[key] - for i in getattr(group, method)(recurse=recurse): - yield i + if self._version == 2: + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + assert not path.startswith("meta") + if contains_array(self._store, path): + _key = key.rstrip("/") + yield _key if keys_only else (_key, self[key]) + elif recurse and contains_group(self._store, path): + group = self[key] + for i in getattr(group, method)(recurse=recurse): + yield i + else: + dir_name = 'meta/root/' + self._path + sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] + array_sfx = '.array' + sfx + for key in sorted(listdir(self._store, dir_name)): + if key.endswith(array_sfx): + key = key[:-len(array_sfx)] + path = self._key_prefix + key + assert not path.startswith("meta") + if key.endswith('.group' + sfx): + # skip group metadata keys + continue + if contains_array(self._store, path): + _key = key.rstrip("/") + yield _key if keys_only else (_key, self[key]) + elif recurse and contains_group(self._store, path): + group = self[key] + for i in getattr(group, method)(recurse=recurse): + yield i def visitvalues(self, func): """Run ``func`` on each object. @@ -707,7 +840,7 @@ def _create_group_nosync(self, name, overwrite=False): return Group(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + synchronizer=self._synchronizer, zarr_version=self._version) def create_groups(self, *names, **kwargs): """Convenience method to create multiple groups in a single call.""" @@ -751,7 +884,7 @@ def _require_group_nosync(self, name, overwrite=False): return Group(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + synchronizer=self._synchronizer, zarr_version=self._version) def require_groups(self, *names): """Convenience method to require multiple groups in a single call.""" @@ -1039,9 +1172,10 @@ def move(self, source, dest): # Check that source exists. if not (contains_array(self._store, source) or - contains_group(self._store, source)): + contains_group(self._store, source, explicit_only=False)): raise ValueError('The source, "%s", does not exist.' % source) - if contains_array(self._store, dest) or contains_group(self._store, dest): + if (contains_array(self._store, dest) or + contains_group(self._store, dest, explicit_only=False)): raise ValueError('The dest, "%s", already exists.' % dest) # Ensure groups needed for `dest` exist. @@ -1051,15 +1185,19 @@ def move(self, source, dest): self._write_op(self._move_nosync, source, dest) -def _normalize_store_arg(store, *, clobber=False, storage_options=None, mode=None): +def _normalize_store_arg(store, *, clobber=False, storage_options=None, mode=None, + zarr_version=None): + if zarr_version is None: + zarr_version = getattr(store, '_store_version', 2) if store is None: - return MemoryStore() + return MemoryStore() if zarr_version == 2 else MemoryStoreV3() return normalize_store_arg(store, clobber=clobber, - storage_options=storage_options, mode=mode) + storage_options=storage_options, mode=mode, + zarr_version=zarr_version) def group(store=None, overwrite=False, chunk_store=None, - cache_attrs=True, synchronizer=None, path=None): + cache_attrs=True, synchronizer=None, path=None, *, zarr_version=None): """Create a group. Parameters @@ -1104,20 +1242,29 @@ def group(store=None, overwrite=False, chunk_store=None, """ # handle polymorphic store arg - store = _normalize_store_arg(store) + store = _normalize_store_arg(store, zarr_version=zarr_version) + if zarr_version is None: + zarr_version = getattr(store, '_store_version', 2) + if zarr_version == 3 and path is None: + raise ValueError(f"path must be provided for a v{zarr_version} group") path = normalize_storage_path(path) - # require group - if overwrite or not contains_group(store): + if zarr_version == 2: + requires_init = overwrite or not contains_group(store) + elif zarr_version == 3: + requires_init = overwrite or not contains_group(store, path) + + if requires_init: init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) return Group(store, read_only=False, chunk_store=chunk_store, - cache_attrs=cache_attrs, synchronizer=synchronizer, path=path) + cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, + zarr_version=zarr_version) def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, - chunk_store=None, storage_options=None): + chunk_store=None, storage_options=None, *, zarr_version=None): """Open a group using file-mode-like semantics. Parameters @@ -1166,11 +1313,22 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N # handle polymorphic store arg clobber = mode != "r" store = _normalize_store_arg( - store, clobber=clobber, storage_options=storage_options, mode=mode - ) + store, clobber=clobber, storage_options=storage_options, mode=mode, + zarr_version=zarr_version) + if zarr_version is None: + zarr_version = getattr(store, '_store_version', 2) if chunk_store is not None: chunk_store = _normalize_store_arg(chunk_store, clobber=clobber, storage_options=storage_options) + if not getattr(chunk_store, '_store_version', 2) == zarr_version: + raise ValueError( + "zarr_version of store and chunk_store must match" + ) + + store_version = getattr(store, '_store_version', 2) + if store_version == 3 and path is None: + raise ValueError("path must be supplied to initialize a zarr v3 group") + path = normalize_storage_path(path) # ensure store is initialized @@ -1202,4 +1360,5 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N read_only = mode == 'r' return Group(store, read_only=read_only, cache_attrs=cache_attrs, - synchronizer=synchronizer, path=path, chunk_store=chunk_store) + synchronizer=synchronizer, path=path, chunk_store=chunk_store, + zarr_version=zarr_version) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 2830be8c38..046995fd3e 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -27,6 +27,10 @@ NestedDirectoryStore, SQLiteStore, ZipStore, array_meta_key, atexit_rmglob, atexit_rmtree, group_meta_key, init_array, init_group) +from zarr.storage import (KVStoreV3, DirectoryStoreV3, # MemoryStoreV3 + FSStoreV3, NestedDirectoryStoreV3, ZipStoreV3, + DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, + LRUStoreCacheV3) from zarr.util import InfoReporter from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container @@ -96,30 +100,51 @@ def test_group_init_errors_2(self): Group(store, chunk_store=chunk_store) store.close() + def _subgroup_path(self, group, path): + path = path.rstrip('/') + absolute = path.startswith('/') + if absolute: + group_path = path + else: + if path: + group_path = '/'.join([group.path, path]) + else: + group_path = path + group_path = group_path.lstrip('/') + group_name = '/' + group_path + return group_path, group_name + def test_create_group(self): g1 = self.create_group() + if g1._version == 2: + path, name = '', '/' + else: + path, name = 'group', '/group' # check root group - assert '' == g1.path - assert '/' == g1.name + assert path == g1.path + assert name == g1.name # create level 1 child group g2 = g1.create_group('foo') + path, name = self._subgroup_path(g1, 'foo') assert isinstance(g2, Group) - assert 'foo' == g2.path - assert '/foo' == g2.name + assert path == g2.path + assert name == g2.name # create level 2 child group g3 = g2.create_group('bar') + path, name = self._subgroup_path(g2, 'bar') assert isinstance(g3, Group) - assert 'foo/bar' == g3.path - assert '/foo/bar' == g3.name + assert path == g3.path + assert name == g3.name # create level 3 child group g4 = g1.create_group('foo/bar/baz') + path, name = self._subgroup_path(g1, 'foo/bar/baz') assert isinstance(g4, Group) - assert 'foo/bar/baz' == g4.path - assert '/foo/bar/baz' == g4.name + assert path == g4.path + assert name == g4.name # create level 3 group via root g5 = g4.create_group('/a/b/c/') @@ -138,17 +163,23 @@ def __str__(self): o = Foo('test/object') go = g1.create_group(o) + path, name = self._subgroup_path(g1, str(o)) assert isinstance(go, Group) - assert 'test/object' == go.path + assert path == go.path go = g1.create_group(b'test/bytes') + path, name = self._subgroup_path(g1, 'test/bytes') assert isinstance(go, Group) - assert 'test/bytes' == go.path + assert path == go.path # test bad keys with pytest.raises(ValueError): g1.create_group('foo') # already exists - with pytest.raises(ValueError): - g1.create_group('a/b/c') # already exists + if g1._version == 2: + with pytest.raises(ValueError): + g1.create_group('a/b/c') # already exists + elif g1._version == 3: + # for v3 'group/a/b/c' does not already exist + g1.create_group('a/b/c') with pytest.raises(ValueError): g4.create_group('/a/b/c') # already exists with pytest.raises(ValueError): @@ -161,9 +192,9 @@ def __str__(self): # multi g6, g7 = g1.create_groups('y', 'z') assert isinstance(g6, Group) - assert g6.path == 'y' + assert g6.path == self._subgroup_path(g1, 'y')[0] assert isinstance(g7, Group) - assert g7.path == 'z' + assert g7.path == self._subgroup_path(g1, 'z')[0] g1.store.close() @@ -172,14 +203,17 @@ def test_require_group(self): # test creation g2 = g1.require_group('foo') + path, name = self._subgroup_path(g1, 'foo') assert isinstance(g2, Group) - assert 'foo' == g2.path + assert path == g2.path g3 = g2.require_group('bar') + path, name = self._subgroup_path(g2, 'bar') assert isinstance(g3, Group) - assert 'foo/bar' == g3.path + assert path == g3.path g4 = g1.require_group('foo/bar/baz') + path, name = self._subgroup_path(g1, 'foo/bar/baz') assert isinstance(g4, Group) - assert 'foo/bar/baz' == g4.path + assert path == g4.path g5 = g4.require_group('/a/b/c/') assert isinstance(g5, Group) assert 'a/b/c' == g5.path @@ -199,33 +233,50 @@ def test_require_group(self): assert g5.store is g5a.store # test path normalization - assert g1.require_group('quux') == g1.require_group('/quux/') + if g1._version == 2: + # TODO: expected behavior for v3 + assert g1.require_group('quux') == g1.require_group('/quux/') # multi g6, g7 = g1.require_groups('y', 'z') assert isinstance(g6, Group) - assert g6.path == 'y' + assert g6.path == self._subgroup_path(g1, 'y')[0] assert isinstance(g7, Group) - assert g7.path == 'z' + assert g7.path == self._subgroup_path(g1, 'z')[0] g1.store.close() + def _dataset_path(self, group, path): + path = path.rstrip('/') + absolute = path.startswith('/') + if absolute: + dataset_path = path + else: + dataset_path = '/'.join([group.path, path]) + dataset_path = dataset_path.lstrip('/') + dataset_name = '/' + dataset_path + return dataset_path, dataset_name + def test_create_dataset(self): g = self.create_group() # create as immediate child - d1 = g.create_dataset('foo', shape=1000, chunks=100) + dpath = 'foo' + d1 = g.create_dataset(dpath, shape=1000, chunks=100) + path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) assert (1000,) == d1.shape assert (100,) == d1.chunks - assert 'foo' == d1.path - assert '/foo' == d1.name + assert path == d1.path + assert name == d1.name assert g.store is d1.store # create as descendant - d2 = g.create_dataset('/a/b/c/', shape=2000, chunks=200, dtype='i1', + dpath = '/a/b/c/' + d2 = g.create_dataset(dpath, shape=2000, chunks=200, dtype='i1', compression='zlib', compression_opts=9, fill_value=42, order='F') + path, name = self._dataset_path(g, dpath) assert isinstance(d2, Array) assert (2000,) == d2.shape assert (200,) == d2.chunks @@ -234,20 +285,22 @@ def test_create_dataset(self): assert 9 == d2.compressor.level assert 42 == d2.fill_value assert 'F' == d2.order - assert 'a/b/c' == d2.path - assert '/a/b/c' == d2.name + assert path == d2.path + assert name == d2.name assert g.store is d2.store # create with data data = np.arange(3000, dtype='u2') - d3 = g.create_dataset('bar', data=data, chunks=300) + dpath = 'bar' + d3 = g.create_dataset(dpath, data=data, chunks=300) + path, name = self._dataset_path(g, dpath) assert isinstance(d3, Array) assert (3000,) == d3.shape assert (300,) == d3.chunks assert np.dtype('u2') == d3.dtype assert_array_equal(data, d3[:]) - assert 'bar' == d3.path - assert '/bar' == d3.name + assert path == d3.path + assert name == d3.name assert g.store is d3.store # compression arguments handling follows... @@ -290,25 +343,27 @@ def test_require_dataset(self): g = self.create_group() # create - d1 = g.require_dataset('foo', shape=1000, chunks=100, dtype='f4') + dpath = 'foo' + d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') d1[:] = np.arange(1000) + path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) assert (1000,) == d1.shape assert (100,) == d1.chunks assert np.dtype('f4') == d1.dtype - assert 'foo' == d1.path - assert '/foo' == d1.name + assert path == d1.path + assert name == d1.name assert g.store is d1.store assert_array_equal(np.arange(1000), d1[:]) # require - d2 = g.require_dataset('foo', shape=1000, chunks=100, dtype='f4') + d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') assert isinstance(d2, Array) assert (1000,) == d2.shape assert (100,) == d2.chunks assert np.dtype('f4') == d2.dtype - assert 'foo' == d2.path - assert '/foo' == d2.name + assert path == d2.path + assert name == d2.name assert g.store is d2.store assert_array_equal(np.arange(1000), d2[:]) assert d1 == d2 @@ -419,7 +474,12 @@ def test_getitem_contains_iterators(self): # setup g1 = self.create_group() g2 = g1.create_group('foo/bar') - d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + if g1._version == 2: + d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + else: + # v3: cannot create a dataset at the root by starting with / + # instead, need to create the dataset on g1 directly + d1 = g1.create_dataset('a/b/c', shape=1000, chunks=100) d1[:] = np.arange(1000) d2 = g1.create_dataset('foo/baz', shape=3000, chunks=300) d2[:] = np.arange(3000) @@ -428,7 +488,13 @@ def test_getitem_contains_iterators(self): assert isinstance(g1['foo'], Group) assert isinstance(g1['foo']['bar'], Group) assert isinstance(g1['foo/bar'], Group) - assert isinstance(g1['/foo/bar/'], Group) + if g1._version == 2: + assert isinstance(g1['/foo/bar/'], Group) + else: + # start or end with / raises KeyError + # TODO: should we fix allow stripping of these on v3? + with pytest.raises(KeyError): + assert isinstance(g1['/foo/bar/'], Group) assert isinstance(g1['foo/baz'], Array) assert g2 == g1['foo/bar'] assert g1['foo']['bar'] == g1['foo/bar'] @@ -454,7 +520,9 @@ def test_getitem_contains_iterators(self): assert 'baz' not in g1 assert 'a/b/c/d' not in g1 assert 'a/z' not in g1 - assert 'quux' not in g1['foo'] + if g1._version == 2: + # TODO: handle implicit group for v3 spec + assert 'quux' not in g1['foo'] # test key errors with pytest.raises(KeyError): @@ -470,12 +538,19 @@ def test_getitem_contains_iterators(self): assert 1 == len(g1['a/b']) # test __iter__, keys() - # currently assumes sorted by key - assert ['a', 'foo'] == list(g1) - assert ['a', 'foo'] == list(g1.keys()) - assert ['bar', 'baz'] == list(g1['foo']) - assert ['bar', 'baz'] == list(g1['foo'].keys()) + if g1._version == 2: + # currently assumes sorted by key + assert ['a', 'foo'] == list(g1) + assert ['a', 'foo'] == list(g1.keys()) + assert ['bar', 'baz'] == list(g1['foo']) + assert ['bar', 'baz'] == list(g1['foo'].keys()) + else: + # v3 is not necessarily sorted by key + assert ['a', 'foo'] == sorted(list(g1)) + assert ['a', 'foo'] == sorted(list(g1.keys())) + assert ['bar', 'baz'] == sorted(list(g1['foo'])) + assert ['bar', 'baz'] == sorted(list(g1['foo'].keys())) assert [] == sorted(g1['foo/bar']) assert [] == sorted(g1['foo/bar'].keys()) @@ -484,6 +559,9 @@ def test_getitem_contains_iterators(self): items = list(g1.items()) values = list(g1.values()) + if g1._version == 3: + # v3 are not automatically sorted by key + items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) assert 'a' == items[0][0] assert g1['a'] == items[0][1] assert g1['a'] == values[0] @@ -493,6 +571,9 @@ def test_getitem_contains_iterators(self): items = list(g1['foo'].items()) values = list(g1['foo'].values()) + if g1._version == 3: + # v3 are not automatically sorted by key + items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) assert 'bar' == items[0][0] assert g1['foo']['bar'] == items[0][1] assert g1['foo']['bar'] == values[0] @@ -501,11 +582,16 @@ def test_getitem_contains_iterators(self): assert g1['foo']['baz'] == values[1] # test array_keys(), arrays(), group_keys(), groups() - # currently assumes sorted by key - assert ['a', 'foo'] == list(g1.group_keys()) groups = list(g1.groups()) arrays = list(g1.arrays()) + if g1._version == 2: + # currently assumes sorted by key + assert ['a', 'foo'] == list(g1.group_keys()) + else: + assert ['a', 'foo'] == sorted(list(g1.group_keys())) + groups = sorted(groups) + arrays = sorted(arrays) assert 'a' == groups[0][0] assert g1['a'] == groups[0][1] assert 'foo' == groups[1][0] @@ -517,6 +603,9 @@ def test_getitem_contains_iterators(self): assert ['baz'] == list(g1['foo'].array_keys()) groups = list(g1['foo'].groups()) arrays = list(g1['foo'].arrays()) + if g1._version == 3: + groups = sorted(groups) + arrays = sorted(arrays) assert 'bar' == groups[0][0] assert g1['foo']['bar'] == groups[0][1] assert 'baz' == arrays[0][0] @@ -537,21 +626,27 @@ def visitor4(name, obj): del items[:] g1.visitvalues(visitor2) - assert [ + expected_items = [ "a", "a/b", "a/b/c", "foo", "foo/bar", "foo/baz", - ] == items + ] + if g1._version == 3: + expected_items = [g1.path + '/' + i for i in expected_items] + assert expected_items == items del items[:] g1["foo"].visitvalues(visitor2) - assert [ + expected_items = [ "foo/bar", "foo/baz", - ] == items + ] + if g1._version == 3: + expected_items = [g1.path + '/' + i for i in expected_items] + assert expected_items == items del items[:] g1.visit(visitor3) @@ -627,6 +722,9 @@ def visitor0(val, *args): # noinspection PyUnusedLocal def visitor1(val, *args): name = getattr(val, "path", val) + if name.startswith('group/'): + # strip the group path for v3 + name = name[6:] if name == "a/b/c": return True @@ -664,6 +762,9 @@ def test_iterators_recurse(self): d3 = g2.create_dataset('zab', shape=2000, chunks=200) d3[:] = np.arange(2000) + if g1._version == 3: + pytest.skip("TODO: fix for V3") + # test recursive array_keys array_keys = list(g1['foo'].array_keys(recurse=False)) array_keys_recurse = list(g1['foo'].array_keys(recurse=True)) @@ -762,9 +863,13 @@ def test_move(self): g2.move("bar", "/bar") assert "foo2" in g assert "foo2/bar" not in g - assert "bar" in g + if g2._version == 2: + # TODO: how to access element created outside of group.path in v3? + assert "bar" in g assert isinstance(g["foo2"], Group) - assert_array_equal(data, g["bar"]) + if g2._version == 2: + # TODO: how to access element created outside of group.path in v3? + assert_array_equal(data, g["bar"]) with pytest.raises(ValueError): g2.move("bar", "bar2") @@ -841,6 +946,9 @@ def test_paths(self): g1 = self.create_group() g2 = g1.create_group('foo/bar') + if g1._version == 3: + pytest.skip("TODO: update class for v3") + assert g1 == g1['/'] assert g1 == g1['//'] assert g1 == g1['///'] @@ -893,7 +1001,9 @@ def test_pickle(self): assert name == g2.name assert n == len(g2) assert keys == list(g2) - assert isinstance(g2['foo'], Group) + if g2._version == 2: + # TODO: handle implicit group for v3 + assert isinstance(g2['foo'], Group) assert isinstance(g2['foo/bar'], Array) g2.store.close() @@ -921,6 +1031,57 @@ def test_group_init_from_dict(chunk_dict): assert chunk_store is not g.chunk_store +# noinspection PyStatementEffect +class TestGroupV3(TestGroup, unittest.TestCase): + + @staticmethod + def create_store(): + # can be overridden in sub-classes + return KVStoreV3(dict()), None + + def create_group(self, store=None, path='group', read_only=False, + chunk_store=None, synchronizer=None): + # can be overridden in sub-classes + if store is None: + store, chunk_store = self.create_store() + init_group(store, path=path, chunk_store=chunk_store) + g = Group(store, path=path, read_only=read_only, + chunk_store=chunk_store, synchronizer=synchronizer) + return g + + def test_group_init_1(self): + store, chunk_store = self.create_store() + g = self.create_group(store, chunk_store=chunk_store) + assert store is g.store + if chunk_store is None: + assert store is g.chunk_store + else: + assert chunk_store is g.chunk_store + assert not g.read_only + # different path/name in v3 case + assert 'group' == g.path + assert '/group' == g.name + assert 'group' == g.basename + + assert isinstance(g.attrs, Attributes) + g.attrs['foo'] = 'bar' + assert g.attrs['foo'] == 'bar' + + assert isinstance(g.info, InfoReporter) + assert isinstance(repr(g.info), str) + assert isinstance(g.info._repr_html_(), str) + store.close() + + def test_group_init_errors_2(self): + store, chunk_store = self.create_store() + path = 'tmp' + init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) + # array blocks group + with pytest.raises(ValueError): + Group(store, path=path, chunk_store=chunk_store) + store.close() + + class TestGroupWithMemoryStore(TestGroup): @staticmethod @@ -928,6 +1089,14 @@ def create_store(): return MemoryStore(), None +# TODO: fix MemoryStoreV3 _get_parent, etc. +# # noinspection PyStatementEffect +# class TestGroupV3WithMemoryStore(TestGroupWithMemoryStore, TestGroupV3): + +# @staticmethod +# def create_store(): +# return MemoryStoreV3(), None + class TestGroupWithDirectoryStore(TestGroup): @staticmethod @@ -938,6 +1107,16 @@ def create_store(): return store, None +class TestGroupV3WithDirectoryStore(TestGroupWithDirectoryStore, TestGroupV3): + + @staticmethod + def create_store(): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = DirectoryStoreV3(path) + return store, None + + @skip_test_env_var("ZARR_TEST_ABS") class TestGroupWithABSStore(TestGroup): @@ -953,6 +1132,8 @@ def test_pickle(self): # internal attribute on ContainerClient isn't serializable for py36 and earlier super().test_pickle() +# TODO TestGroupV3WithABSStore(TestGroup): + class TestGroupWithNestedDirectoryStore(TestGroup): @@ -964,6 +1145,16 @@ def create_store(): return store, None +class TestGroupV3WithNestedDirectoryStore(TestGroupWithNestedDirectoryStore, TestGroupV3): + + @staticmethod + def create_store(): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = NestedDirectoryStoreV3(path) + return store, None + + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithFSStore(TestGroup): @@ -986,6 +1177,29 @@ def test_round_trip_nd(self): np.testing.assert_array_equal(h[name][:], data) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): + + @staticmethod + def create_store(): + pytest.skip("TODO: Fix for V3") + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStoreV3(path) + return store, None + + def test_round_trip_nd(self): + data = np.arange(1000).reshape(10, 10, 10) + name = 'raw' + + store, _ = self.create_store() + f = open_group(store, path='group', mode='w') + f.create_dataset(name, data=data, chunks=(5, 5, 5), + compressor=None) + h = open_group(store, path='group', mode='r') + np.testing.assert_array_equal(h[name][:], data) + + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithNestedFSStore(TestGroupWithFSStore): @@ -1009,6 +1223,30 @@ def test_inconsistent_dimension_separator(self): compressor=None, dimension_separator='.') +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): + + @staticmethod + def create_store(): + pytest.skip("TODO: Fix for V3") + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStoreV3(path, key_separator='/', auto_mkdir=True) + return store, None + + def test_inconsistent_dimension_separator(self): + data = np.arange(1000).reshape(10, 10, 10) + name = 'raw' + + store, _ = self.create_store() + f = open_group(store, path='group', mode='w') + + # cannot specify dimension_separator that conflicts with the store + with pytest.raises(ValueError): + f.create_dataset(name, data=data, chunks=(5, 5, 5), + compressor=None, dimension_separator='.') + + class TestGroupWithZipStore(TestGroup): @staticmethod @@ -1036,6 +1274,16 @@ def test_move(self): pass +class TestGroupV3WithZipStore(TestGroupWithZipStore, TestGroupV3): + + @staticmethod + def create_store(): + path = tempfile.mktemp(suffix='.zip') + atexit.register(os.remove, path) + store = ZipStoreV3(path) + return store, None + + class TestGroupWithDBMStore(TestGroup): @staticmethod @@ -1046,6 +1294,16 @@ def create_store(): return store, None +class TestGroupV3WithDBMStore(TestGroupWithDBMStore, TestGroupV3): + + @staticmethod + def create_store(): + path = tempfile.mktemp(suffix='.anydbm') + atexit.register(atexit_rmglob, path + '*') + store = DBMStoreV3(path, flag='n') + return store, None + + class TestGroupWithDBMStoreBerkeleyDB(TestGroup): @staticmethod @@ -1057,6 +1315,17 @@ def create_store(): return store, None +class TestGroupV3WithDBMStoreBerkeleyDB(TestGroupWithDBMStoreBerkeleyDB, TestGroupV3): + + @staticmethod + def create_store(): + bsddb3 = pytest.importorskip("bsddb3") + path = tempfile.mktemp(suffix='.dbm') + atexit.register(os.remove, path) + store = DBMStoreV3(path, flag='n', open=bsddb3.btopen) + return store, None + + class TestGroupWithLMDBStore(TestGroup): @staticmethod @@ -1068,6 +1337,17 @@ def create_store(): return store, None +class TestGroupV3WithLMDBStore(TestGroupWithLMDBStore, TestGroupV3): + + @staticmethod + def create_store(): + pytest.importorskip("lmdb") + path = tempfile.mktemp(suffix='.lmdb') + atexit.register(atexit_rmtree, path) + store = LMDBStoreV3(path) + return store, None + + class TestGroupWithSQLiteStore(TestGroup): def create_store(self): @@ -1078,6 +1358,16 @@ def create_store(self): return store, None +class TestGroupV3WithSQLiteStore(TestGroupWithSQLiteStore, TestGroupV3): + + def create_store(self): + pytest.importorskip("sqlite3") + path = tempfile.mktemp(suffix='.db') + atexit.register(atexit_rmtree, path) + store = SQLiteStoreV3(path) + return store, None + + class TestGroupWithChunkStore(TestGroup): @staticmethod @@ -1109,6 +1399,41 @@ def test_chunk_store(self): assert expect == actual +class TestGroupV3WithChunkStore(TestGroupWithChunkStore, TestGroupV3): + + @staticmethod + def create_store(): + return KVStoreV3(dict()), KVStoreV3(dict()) + + def test_chunk_store(self): + # setup + store, chunk_store = self.create_store() + path = 'group1' + g = self.create_group(store, path=path, chunk_store=chunk_store) + + # check attributes + assert store is g.store + assert chunk_store is g.chunk_store + + # create array + a = g.zeros('foo', shape=100, chunks=10) + assert store is a.store + assert chunk_store is a.chunk_store + a[:] = np.arange(100) + assert_array_equal(np.arange(100), a[:]) + + # check store keys + group_key = 'meta/root/' + path + '.group.json' + array_key = 'meta/root/' + path + '/foo' + '.array.json' + expect = sorted([group_key, array_key, 'zarr.json']) + actual = sorted(store.keys()) + assert expect == actual + expect = ['data/root/' + path + '/foo/c' + str(i) for i in range(10)] + expect += ['zarr.json'] + actual = sorted(chunk_store.keys()) + assert expect == actual + + class TestGroupWithStoreCache(TestGroup): @staticmethod @@ -1117,44 +1442,75 @@ def create_store(): return store, None -def test_group(): +class TestGroupV3WithStoreCache(TestGroupWithStoreCache, TestGroupV3): + + @staticmethod + def create_store(): + store = LRUStoreCacheV3(dict(), max_size=None) + return store, None + + +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_group(zarr_version): # test the group() convenience function # basic usage - g = group() + if zarr_version == 2: + g = group() + assert '' == g.path + assert '/' == g.name + else: + g = group(path='group1', zarr_version=zarr_version) + assert 'group1' == g.path + assert '/group1' == g.name assert isinstance(g, Group) - assert '' == g.path - assert '/' == g.name # usage with custom store - store = KVStore(dict()) - g = group(store=store) + if zarr_version == 2: + store = KVStore(dict()) + path = None + else: + store = KVStoreV3(dict()) + path = 'foo' + g = group(store=store, path=path) assert isinstance(g, Group) assert store is g.store # overwrite behaviour - store = KVStore(dict()) - init_array(store, shape=100, chunks=10) + if zarr_version == 2: + store = KVStore(dict()) + path = None + else: + store = KVStoreV3(dict()) + path = 'foo' + init_array(store, path=path, shape=100, chunks=10) with pytest.raises(ValueError): - group(store) - g = group(store, overwrite=True) + group(store, path=path) + g = group(store, path=path, overwrite=True) assert isinstance(g, Group) assert store is g.store -def test_open_group(): +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_open_group(zarr_version): # test the open_group() convenience function store = 'data/group.zarr' + expected_store_type = DirectoryStore if zarr_version == 2 else DirectoryStoreV3 + # mode == 'w' - g = open_group(store, mode='w') + path = None if zarr_version == 2 else 'group1' + g = open_group(store, path=path, mode='w', zarr_version=zarr_version) assert isinstance(g, Group) - assert isinstance(g.store, DirectoryStore) + assert isinstance(g.store, expected_store_type) assert 0 == len(g) g.create_groups('foo', 'bar') assert 2 == len(g) + # TODO: update the r, r+ test case here for zarr_version == 3 after + # open_array has StoreV3 support + # mode in 'r', 'r+' open_array('data/array.zarr', shape=100, chunks=10, mode='w') for mode in 'r', 'r+': @@ -1175,37 +1531,40 @@ def test_open_group(): # mode == 'a' shutil.rmtree(store) - g = open_group(store, mode='a') + g = open_group(store, path=path, mode='a', zarr_version=zarr_version) assert isinstance(g, Group) - assert isinstance(g.store, DirectoryStore) + assert isinstance(g.store, expected_store_type) assert 0 == len(g) g.create_groups('foo', 'bar') assert 2 == len(g) with pytest.raises(ValueError): - open_group('data/array.zarr', mode='a') + open_group('data/array.zarr', mode='a', zarr_version=zarr_version) # mode in 'w-', 'x' for mode in 'w-', 'x': shutil.rmtree(store) - g = open_group(store, mode=mode) + g = open_group(store, path=path, mode=mode, zarr_version=zarr_version) assert isinstance(g, Group) - assert isinstance(g.store, DirectoryStore) + assert isinstance(g.store, expected_store_type) assert 0 == len(g) g.create_groups('foo', 'bar') assert 2 == len(g) with pytest.raises(ValueError): - open_group(store, mode=mode) - with pytest.raises(ValueError): - open_group('data/array.zarr', mode=mode) + open_group(store, path=path, mode=mode, zarr_version=zarr_version) + if zarr_version == 2: + with pytest.raises(ValueError): + open_group('data/array.zarr', mode=mode) # open with path - g = open_group(store, path='foo/bar') + g = open_group(store, path='foo/bar', zarr_version=zarr_version) assert isinstance(g, Group) assert 'foo/bar' == g.path -def test_group_completions(): - g = group() +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_group_completions(zarr_version): + path = None if zarr_version == 2 else 'group1' + g = group(path=path, zarr_version=zarr_version) d = dir(g) assert 'foo' not in d assert 'bar' not in d @@ -1233,8 +1592,10 @@ def test_group_completions(): assert '456' not in d # not valid identifier -def test_group_key_completions(): - g = group() +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_group_key_completions(zarr_version): + path = None if zarr_version == 2 else 'group1' + g = group(path=path, zarr_version=zarr_version) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() @@ -1308,9 +1669,11 @@ def _check_tree(g, expect_bytes, expect_text): isinstance(widget, ipytree.Tree) -def test_tree(): +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_tree(zarr_version): # setup - g1 = group() + path = None if zarr_version == 2 else 'group1' + g1 = group(path=path, zarr_version=zarr_version) g2 = g1.create_group('foo') g3 = g1.create_group('bar') g3.create_group('baz') @@ -1318,20 +1681,38 @@ def test_tree(): g5.create_dataset('baz', shape=100, chunks=10) # test root group - expect_bytes = textwrap.dedent("""\ - / - +-- bar - | +-- baz - | +-- quux - | +-- baz (100,) float64 - +-- foo""").encode() - expect_text = textwrap.dedent("""\ - / - ├── bar - │ ├── baz - │ └── quux - │ └── baz (100,) float64 - └── foo""") + if zarr_version == 2: + expect_bytes = textwrap.dedent("""\ + / + +-- bar + | +-- baz + | +-- quux + | +-- baz (100,) float64 + +-- foo""").encode() + expect_text = textwrap.dedent("""\ + / + ├── bar + │ ├── baz + │ └── quux + │ └── baz (100,) float64 + └── foo""") + else: + # Almost the same as for v2, but has a path name and the + # subgroups are not necessarily sorted alphabetically. + expect_bytes = textwrap.dedent("""\ + group1 + +-- foo + +-- bar + +-- baz + +-- quux + +-- baz (100,) float64""").encode() + expect_text = textwrap.dedent("""\ + group1 + ├── foo + └── bar + ├── baz + └── quux + └── baz (100,) float64""") _check_tree(g1, expect_bytes, expect_text) # test different group From 00b2cc077affce92b2644877063860af6d4091b7 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 22:39:30 -0500 Subject: [PATCH 021/109] add StoreV3 support to creation routines --- zarr/creation.py | 36 +++- zarr/tests/test_creation.py | 321 +++++++++++++++++++++++++----------- 2 files changed, 253 insertions(+), 104 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 64c5666adb..8bce8673d9 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -19,7 +19,8 @@ def create(shape, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', store=None, synchronizer=None, overwrite=False, path=None, chunk_store=None, filters=None, cache_metadata=True, cache_attrs=True, read_only=False, - object_codec=None, dimension_separator=None, write_empty_chunks=True, **kwargs): + object_codec=None, dimension_separator=None, write_empty_chunks=True, *, + zarr_version=None, **kwargs): """Create an array. Parameters @@ -77,7 +78,10 @@ def create(shape, chunks=True, dtype=None, compressor='default', that chunk's key is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. - + zarr_version : {None, 2, 3}, optional + The zarr protocol version of the created array. If None, it will be + inferred from ``store`` or ``chunk_store`` if they are provided, + otherwise defaulting to 2. Returns ------- @@ -122,9 +126,12 @@ def create(shape, chunks=True, dtype=None, compressor='default', """ + if zarr_version is None and store is None: + zarr_version = getattr(chunk_store, '_store_version', 2) # handle polymorphic store arg - store = normalize_store_arg(store) + store = normalize_store_arg(store, zarr_version=zarr_version) + zarr_version = getattr(store, '_store_version', 2) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) @@ -141,6 +148,9 @@ def create(shape, chunks=True, dtype=None, compressor='default', f"{store_separator}") dimension_separator = normalize_dimension_separator(dimension_separator) + if zarr_version > 2 and path is None: + raise ValueError("path must be supplied to initialize a zarr v3 array") + # initialize array metadata init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, overwrite=overwrite, path=path, @@ -388,6 +398,8 @@ def open_array( storage_options=None, partial_decompress=False, write_empty_chunks=True, + *, + zarr_version=None, **kwargs ): """Open an array using file-mode-like semantics. @@ -450,6 +462,10 @@ def open_array( that chunk's key is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. + zarr_version : {None, 2, 3}, optional + The zarr protocol version of the array to be opened. If None, it will + be inferred from ``store`` or ``chunk_store`` if they are provided, + otherwise defaulting to 2. Returns ------- @@ -484,12 +500,21 @@ def open_array( # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) + if zarr_version is None and store is None: + zarr_version = getattr(chunk_store, '_store_version', 2) + # handle polymorphic store arg clobber = (mode == 'w') - store = normalize_store_arg(store, clobber=clobber, storage_options=storage_options, mode=mode) + store = normalize_store_arg(store, clobber=clobber, storage_options=storage_options, + mode=mode, zarr_version=zarr_version) + zarr_version = getattr(store, '_store_version', 2) if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, clobber=clobber, - storage_options=storage_options) + storage_options=storage_options, + zarr_version=zarr_version) + + if zarr_version == 3 and path is None: + path = 'array' # TODO: raise ValueError instead? path = normalize_storage_path(path) # API compatibility with h5py @@ -559,6 +584,7 @@ def _like_args(a, kwargs): kwargs.setdefault('compressor', a.compressor) kwargs.setdefault('order', a.order) kwargs.setdefault('filters', a.filters) + kwargs.setdefault('zarr_version', a._version) else: kwargs.setdefault('compressor', 'default') kwargs.setdefault('order', 'C') diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 0ec551ba4e..1d27077176 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -15,7 +15,7 @@ zeros_like) from zarr.hierarchy import open_group from zarr.n5 import N5Store -from zarr.storage import DirectoryStore, KVStore +from zarr.storage import DirectoryStore, DirectoryStoreV3, KVStore, KVStoreV3 from zarr.sync import ThreadSynchronizer @@ -47,24 +47,36 @@ def __getitem__(self, item): return self.data[item] -def test_array(): +def _init_creation_kwargs(zarr_version): + kwargs = {'zarr_version': zarr_version} + if zarr_version == 3: + kwargs['path'] = 'array' + return kwargs + + +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_array(zarr_version): + + expected_zarr_version = 2 if zarr_version is None else zarr_version + kwargs = _init_creation_kwargs(zarr_version) # with numpy array a = np.arange(100) - z = array(a, chunks=10) + z = array(a, chunks=10, **kwargs) assert a.shape == z.shape assert a.dtype == z.dtype + assert z._store._store_version == expected_zarr_version assert_array_equal(a, z[:]) # with array-like a = list(range(100)) - z = array(a, chunks=10) + z = array(a, chunks=10, **kwargs) assert (100,) == z.shape assert np.asarray(a).dtype == z.dtype assert_array_equal(np.asarray(a), z[:]) # with another zarr array - z2 = array(z) + z2 = array(z, **kwargs) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype @@ -74,73 +86,86 @@ def test_array(): b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) - z3 = array(c) + z3 = array(c, **kwargs) assert c.shape == z3.shape assert (10, 10) == z3.chunks b = np.arange(1000).reshape(100, 10) c = MockH5pyDataset(b, chunks=(10, 2)) - z4 = array(c) + z4 = array(c, **kwargs) assert c.shape == z4.shape assert (10, 2) == z4.chunks c = MockH5pyDataset(b, chunks=None) - z5 = array(c) + z5 = array(c, **kwargs) assert c.shape == z5.shape assert isinstance(z5.chunks, tuple) # with dtype=None a = np.arange(100, dtype='i4') - z = array(a, dtype=None) + z = array(a, dtype=None, **kwargs) assert_array_equal(a[:], z[:]) assert a.dtype == z.dtype # with dtype=something else a = np.arange(100, dtype='i4') - z = array(a, dtype='i8') + z = array(a, dtype='i8', **kwargs) assert_array_equal(a[:], z[:]) assert np.dtype('i8') == z.dtype -def test_empty(): - z = empty(100, chunks=10) +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_empty(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + z = empty(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks -def test_zeros(): - z = zeros(100, chunks=10) +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_zeros(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + z = zeros(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.zeros(100), z[:]) -def test_ones(): - z = ones(100, chunks=10) +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_ones(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + z = ones(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.ones(100), z[:]) -def test_full(): - z = full(100, chunks=10, fill_value=42, dtype='i4') +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_full(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + z = full(100, chunks=10, fill_value=42, dtype='i4', **kwargs) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42, dtype='i4'), z[:]) # nan - z = full(100, chunks=10, fill_value=np.nan, dtype='f8') + z = full(100, chunks=10, fill_value=np.nan, dtype='f8', **kwargs) assert np.all(np.isnan(z[:])) + +@pytest.mark.parametrize('zarr_version', [None, 2]) +def test_full_additional_dtypes(zarr_version): + """Test additional types that aren't part of the base v3 spec.""" + kwargs = _init_creation_kwargs(zarr_version) # NaT - z = full(100, chunks=10, fill_value='NaT', dtype='M8[s]') + z = full(100, chunks=10, fill_value='NaT', dtype='M8[s]', **kwargs) assert np.all(np.isnat(z[:])) - z = full(100, chunks=10, fill_value='NaT', dtype='m8[s]') + z = full(100, chunks=10, fill_value='NaT', dtype='m8[s]', **kwargs) assert np.all(np.isnat(z[:])) # byte string dtype v = b'xxx' - z = full(100, chunks=10, fill_value=v, dtype='S3') + z = full(100, chunks=10, fill_value=v, dtype='S3', **kwargs) assert v == z[0] a = z[...] assert z.dtype == a.dtype @@ -149,7 +174,7 @@ def test_full(): # unicode string dtype v = 'xxx' - z = full(100, chunks=10, fill_value=v, dtype='U3') + z = full(100, chunks=10, fill_value=v, dtype='U3', **kwargs) assert v == z[0] a = z[...] assert z.dtype == a.dtype @@ -162,37 +187,51 @@ def test_full(): full(100, chunks=10, fill_value=v, dtype='U3') -def test_open_array(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_open_array(zarr_version): store = 'data/array.zarr' + kwargs = _init_creation_kwargs(zarr_version) # mode == 'w' - z = open_array(store, mode='w', shape=100, chunks=10) + z = open_array(store, mode='w', shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - assert isinstance(z.store, DirectoryStore) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) # mode in 'r', 'r+' - open_group('data/group.zarr', mode='w') + group_kwargs = kwargs.copy() + if zarr_version == 3: + group_kwargs['path'] = 'group' + open_group('data/group.zarr', mode='w', **group_kwargs) for mode in 'r', 'r+': with pytest.raises(ValueError): open_array('doesnotexist', mode=mode) with pytest.raises(ValueError): open_array('data/group.zarr', mode=mode) - z = open_array(store, mode='r') + z = open_array(store, mode='r', **kwargs) assert isinstance(z, Array) - assert isinstance(z.store, DirectoryStore) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) with pytest.raises(PermissionError): z[:] = 43 - z = open_array(store, mode='r+') + z = open_array(store, mode='r+', **kwargs) assert isinstance(z, Array) - assert isinstance(z.store, DirectoryStore) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) @@ -201,51 +240,75 @@ def test_open_array(): # mode == 'a' shutil.rmtree(store) - z = open_array(store, mode='a', shape=100, chunks=10) + z = open_array(store, mode='a', shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - assert isinstance(z.store, DirectoryStore) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) - with pytest.raises(ValueError): - open_array('data/group.zarr', mode='a') + + expected_error = TypeError if zarr_version == 3 else ValueError + # v3 path does not conflict, but will raise TypeError without shape kwarg + with pytest.raises(expected_error): + # array would end up at data/group.zarr/meta/root/array.array.json + open_array('data/group.zarr', mode='a', **kwargs) # mode in 'w-', 'x' for mode in 'w-', 'x': shutil.rmtree(store) - z = open_array(store, mode=mode, shape=100, chunks=10) + z = open_array(store, mode=mode, shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - assert isinstance(z.store, DirectoryStore) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) with pytest.raises(ValueError): - open_array(store, mode=mode) - with pytest.raises(ValueError): - open_array('data/group.zarr', mode=mode) + open_array(store, mode=mode, **kwargs) + expected_error = TypeError if zarr_version == 3 else ValueError + # v3 path does not conflict, but will raise TypeError without shape kwarg + with pytest.raises(expected_error): + open_array('data/group.zarr', mode=mode, **kwargs) # with synchronizer - z = open_array(store, synchronizer=ThreadSynchronizer()) + z = open_array(store, synchronizer=ThreadSynchronizer(), **kwargs) assert isinstance(z, Array) # with path - z = open_array(store, shape=100, path='foo/bar', mode='w') + kwargs_no_path = kwargs.copy() + kwargs_no_path.pop('path', None) + z = open_array(store, shape=100, path='foo/bar', mode='w', + **kwargs_no_path) assert isinstance(z, Array) assert 'foo/bar' == z.path # with chunk store meta_store = 'data/meta.zarr' chunk_store = 'data/chunks.zarr' - z = open_array(store=meta_store, chunk_store=chunk_store, shape=11, mode='w') + z = open_array(store=meta_store, chunk_store=chunk_store, shape=11, + mode='w', **kwargs) z[:] = 42 assert os.path.abspath(meta_store) == z.store.path assert os.path.abspath(chunk_store) == z.chunk_store.path + +# TODO: N5 support for v3 +@pytest.mark.parametrize('zarr_version', [None, 2]) +def test_open_array_n5(zarr_version): + + store = 'data/array.zarr' + kwargs = _init_creation_kwargs(zarr_version) + # for N5 store store = 'data/array.n5' - z = open_array(store, mode='w', shape=100, chunks=10) + z = open_array(store, mode='w', shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) assert isinstance(z.store, N5Store) @@ -254,7 +317,10 @@ def test_open_array(): assert_array_equal(np.full(100, fill_value=42), z[:]) store = 'data/group.n5' - z = open_group(store, mode='w') + group_kwargs = kwargs.copy() + if zarr_version == 3: + group_kwargs['path'] = 'group' + z = open_group(store, mode='w', **group_kwargs) i = z.create_group('inner') a = i.zeros("array", shape=100, chunks=10) a[:] = 42 @@ -264,7 +330,7 @@ def test_open_array(): o.write("{}") # Re-open - a = open_group(store)["inner"]["array"] + a = open_group(store, **group_kwargs)["inner"]["array"] assert isinstance(a, Array) assert isinstance(z.store, N5Store) assert (100,) == a.shape @@ -272,140 +338,181 @@ def test_open_array(): assert_array_equal(np.full(100, fill_value=42), a[:]) -def test_open_array_dict_store(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_open_array_dict_store(zarr_version): # dict will become a KVStore store = dict() + kwargs = _init_creation_kwargs(zarr_version) + expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore # mode == 'w' - z = open_array(store, mode='w', shape=100, chunks=10) + z = open_array(store, mode='w', shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - assert isinstance(z.store, KVStore) + assert isinstance(z.store, expected_store_type) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) -def test_create_in_dict(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_create_in_dict(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore + for func in [empty, zeros, ones]: - a = func(100, store=dict()) - assert isinstance(a.store, KVStore) + a = func(100, store=dict(), **kwargs) + assert isinstance(a.store, expected_store_type) - a = full(100, 5, store=dict()) - assert isinstance(a.store, KVStore) + a = full(100, 5, store=dict(), **kwargs) + assert isinstance(a.store, expected_store_type) -def test_empty_like(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_empty_like(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version # zarr array z = empty(100, chunks=10, dtype='f4', compressor=Zlib(5), - order='F') - z2 = empty_like(z) + order='F', **kwargs) + # zarr_version will be inferred from z, but have to specify a path in v3 + z2 = empty_like(z, path=kwargs.get('path', None)) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order + assert (z._store._store_version == z2._store._store_version == + expected_zarr_version) # numpy array a = np.empty(100, dtype='f4') - z3 = empty_like(a) + z3 = empty_like(a, **kwargs) assert a.shape == z3.shape assert (100,) == z3.chunks assert a.dtype == z3.dtype assert z3.fill_value is None + assert z3._store._store_version == expected_zarr_version # something slightly silly a = [0] * 100 - z3 = empty_like(a, shape=200) + z3 = empty_like(a, shape=200, **kwargs) assert (200,) == z3.shape # other array-likes b = np.arange(1000).reshape(100, 10) c = MockBcolzArray(b, 10) - z = empty_like(c) + z = empty_like(c, **kwargs) assert b.shape == z.shape assert (10, 10) == z.chunks c = MockH5pyDataset(b, chunks=(10, 2)) - z = empty_like(c) + z = empty_like(c, **kwargs) assert b.shape == z.shape assert (10, 2) == z.chunks c = MockH5pyDataset(b, chunks=None) - z = empty_like(c) + z = empty_like(c, **kwargs) assert b.shape == z.shape assert isinstance(z.chunks, tuple) -def test_zeros_like(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_zeros_like(zarr_version): + + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version + # zarr array z = zeros(100, chunks=10, dtype='f4', compressor=Zlib(5), - order='F') - z2 = zeros_like(z) + order='F', **kwargs) + z2 = zeros_like(z, path=kwargs.get('path', None)) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order + assert (z._store._store_version == z2._store._store_version == + expected_zarr_version) # numpy array a = np.empty(100, dtype='f4') - z3 = zeros_like(a, chunks=10) + z3 = zeros_like(a, chunks=10, **kwargs) assert a.shape == z3.shape assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 0 == z3.fill_value -def test_ones_like(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_ones_like(zarr_version): + + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version + # zarr array z = ones(100, chunks=10, dtype='f4', compressor=Zlib(5), - order='F') - z2 = ones_like(z) + order='F', **kwargs) + z2 = ones_like(z, path=kwargs.get('path', None)) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order + assert (z._store._store_version == z2._store._store_version == + expected_zarr_version) # numpy array a = np.empty(100, dtype='f4') - z3 = ones_like(a, chunks=10) + z3 = ones_like(a, chunks=10, **kwargs) assert a.shape == z3.shape assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 1 == z3.fill_value + assert z3._store._store_version == expected_zarr_version + +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_full_like(zarr_version): + + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version -def test_full_like(): z = full(100, chunks=10, dtype='f4', compressor=Zlib(5), - fill_value=42, order='F') - z2 = full_like(z) + fill_value=42, order='F', **kwargs) + z2 = full_like(z, path=kwargs.get('path', None)) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order + assert (z._store._store_version == z2._store._store_version == + expected_zarr_version) # numpy array a = np.empty(100, dtype='f4') - z3 = full_like(a, chunks=10, fill_value=42) + z3 = full_like(a, chunks=10, fill_value=42, **kwargs) assert a.shape == z3.shape assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 42 == z3.fill_value + assert z3._store._store_version == expected_zarr_version with pytest.raises(TypeError): # fill_value missing - full_like(a, chunks=10) + full_like(a, chunks=10, **kwargs) + +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_open_like(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version -def test_open_like(): # zarr array path = tempfile.mktemp() atexit.register(shutil.rmtree, path) z = full(100, chunks=10, dtype='f4', compressor=Zlib(5), - fill_value=42, order='F') + fill_value=42, order='F', **kwargs) z2 = open_like(z, path) assert z.shape == z2.shape assert z.chunks == z2.chunks @@ -413,31 +520,38 @@ def test_open_like(): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order + assert (z._store._store_version == z2._store._store_version == + expected_zarr_version) # numpy array path = tempfile.mktemp() atexit.register(shutil.rmtree, path) a = np.empty(100, dtype='f4') - z3 = open_like(a, path, chunks=10) + z3 = open_like(a, path, chunks=10, zarr_version=zarr_version) assert a.shape == z3.shape assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 0 == z3.fill_value + assert z3._store._store_version == expected_zarr_version -def test_create(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_create(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + expected_zarr_version = 2 if zarr_version is None else zarr_version # defaults - z = create(100) + z = create(100, **kwargs) assert isinstance(z, Array) assert (100,) == z.shape assert (100,) == z.chunks # auto-chunks assert np.dtype(None) == z.dtype assert 'blosc' == z.compressor.codec_id assert 0 == z.fill_value + assert z._store._store_version == expected_zarr_version # all specified z = create(100, chunks=10, dtype='i4', compressor=Zlib(1), - fill_value=42, order='F') + fill_value=42, order='F', **kwargs) assert isinstance(z, Array) assert (100,) == z.shape assert (10,) == z.chunks @@ -446,82 +560,91 @@ def test_create(): assert 1 == z.compressor.level assert 42 == z.fill_value assert 'F' == z.order + assert z._store._store_version == expected_zarr_version # with synchronizer synchronizer = ThreadSynchronizer() - z = create(100, chunks=10, synchronizer=synchronizer) + z = create(100, chunks=10, synchronizer=synchronizer, **kwargs) assert isinstance(z, Array) assert (100,) == z.shape assert (10,) == z.chunks assert synchronizer is z.synchronizer + assert z._store._store_version == expected_zarr_version # don't allow string as compressor arg with pytest.raises(ValueError): - create(100, chunks=10, compressor='zlib') + create(100, chunks=10, compressor='zlib', **kwargs) # h5py compatibility - z = create(100, compression='zlib', compression_opts=9) + z = create(100, compression='zlib', compression_opts=9, **kwargs) assert 'zlib' == z.compressor.codec_id assert 9 == z.compressor.level - z = create(100, compression='default') + z = create(100, compression='default', **kwargs) assert 'blosc' == z.compressor.codec_id # errors with pytest.raises(ValueError): # bad compression argument - create(100, compression=1) + create(100, compression=1, **kwargs) with pytest.raises(ValueError): # bad fill value - create(100, dtype='i4', fill_value='foo') + create(100, dtype='i4', fill_value='foo', **kwargs) # auto chunks - z = create(1000000000, chunks=True) + z = create(1000000000, chunks=True, **kwargs) assert z.chunks[0] < z.shape[0] - z = create(1000000000, chunks=None) # backwards-compatibility + z = create(1000000000, chunks=None, **kwargs) # backwards-compatibility assert z.chunks[0] < z.shape[0] # no chunks - z = create(1000000000, chunks=False) + z = create(1000000000, chunks=False, **kwargs) assert z.chunks == z.shape -def test_compression_args(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_compression_args(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) with warnings.catch_warnings(): warnings.simplefilter("default") - z = create(100, compression="zlib", compression_opts=9) + z = create(100, compression="zlib", compression_opts=9, **kwargs) assert isinstance(z, Array) assert "zlib" == z.compressor.codec_id assert 9 == z.compressor.level # 'compressor' overrides 'compression' with pytest.warns(UserWarning): - z = create(100, compressor=Zlib(9), compression="bz2", compression_opts=1) + z = create(100, compressor=Zlib(9), compression="bz2", + compression_opts=1, **kwargs) assert isinstance(z, Array) assert "zlib" == z.compressor.codec_id assert 9 == z.compressor.level # 'compressor' ignores 'compression_opts' with pytest.warns(UserWarning): - z = create(100, compressor=Zlib(9), compression_opts=1) + z = create(100, compressor=Zlib(9), compression_opts=1, **kwargs) assert isinstance(z, Array) assert "zlib" == z.compressor.codec_id assert 9 == z.compressor.level with pytest.warns(UserWarning): # 'compressor' overrides 'compression' - create(100, compressor=Zlib(9), compression="bz2", compression_opts=1) + create(100, compressor=Zlib(9), compression="bz2", + compression_opts=1, **kwargs) with pytest.warns(UserWarning): # 'compressor' ignores 'compression_opts' - create(100, compressor=Zlib(9), compression_opts=1) + create(100, compressor=Zlib(9), compression_opts=1, **kwargs) -def test_create_read_only(): +@pytest.mark.parametrize('zarr_version', [None, 2, 3]) +def test_create_read_only(zarr_version): # https://github.com/alimanfoo/zarr/issues/151 + kwargs = _init_creation_kwargs(zarr_version) + # create an array initially read-only, then enable writing - z = create(100, read_only=True) + z = create(100, read_only=True, **kwargs) assert z.read_only with pytest.raises(PermissionError): z[:] = 42 @@ -535,7 +658,7 @@ def test_create_read_only(): # this is subtly different, but here we want to create an array with data, and then # have it be read-only a = np.arange(100) - z = array(a, read_only=True) + z = array(a, read_only=True, **kwargs) assert_array_equal(a, z[...]) assert z.read_only with pytest.raises(PermissionError): From 504913ae4be41f594a65eeb78bae49fe575e3d65 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 16 Dec 2021 15:18:45 -0500 Subject: [PATCH 022/109] Handle dimension_separator appropriately in open_array Specifically, we want to be able to infer the dimension_separator from the store if possible --- zarr/creation.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 8bce8673d9..da84347760 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -400,6 +400,7 @@ def open_array( write_empty_chunks=True, *, zarr_version=None, + dimension_separator=None, **kwargs ): """Open an array using file-mode-like semantics. @@ -466,6 +467,11 @@ def open_array( The zarr protocol version of the array to be opened. If None, it will be inferred from ``store`` or ``chunk_store`` if they are provided, otherwise defaulting to 2. + zarr_version : {None, '.', '/'}, optional + Can be used to specify whether the array is in a flat ('.') or nested + ('/') format. If None, the appropriate value will be read from `store` + when present. Otherwise, defaults to '.' when ``zarr_version == 2`` + and `/` otherwise. Returns ------- @@ -513,6 +519,13 @@ def open_array( storage_options=storage_options, zarr_version=zarr_version) + # respect the dimension separator specified in a store, if present + if dimension_separator is None: + if hasattr(store, '_dimension_separator'): + dimension_separator = store._dimension_separator + else: + dimension_separator = '.' if zarr_version == 2 else '/' + if zarr_version == 3 and path is None: path = 'array' # TODO: raise ValueError instead? path = normalize_storage_path(path) @@ -536,7 +549,8 @@ def open_array( init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, overwrite=True, path=path, - object_codec=object_codec, chunk_store=chunk_store) + object_codec=object_codec, chunk_store=chunk_store, + dimension_separator=dimension_separator) elif mode == 'a': if not contains_array(store, path=path): @@ -545,7 +559,8 @@ def open_array( init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store) + object_codec=object_codec, chunk_store=chunk_store, + dimension_separator=dimension_separator) elif mode in ['w-', 'x']: if contains_group(store, path=path): @@ -556,7 +571,8 @@ def open_array( init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store) + object_codec=object_codec, chunk_store=chunk_store, + dimension_separator=dimension_separator) # determine read only status read_only = mode == 'r' From 71429746d0d09ab859e2aeaf52038b12c42d8b2b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 16 Dec 2021 15:42:19 -0500 Subject: [PATCH 023/109] TST: add tests for open_array and dimension_separator --- zarr/tests/test_creation.py | 42 +++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 1d27077176..e92a9caf62 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -187,14 +187,16 @@ def test_full_additional_dtypes(zarr_version): full(100, chunks=10, fill_value=v, dtype='U3') +@pytest.mark.parametrize('dimension_separator', ['.', '/', None]) @pytest.mark.parametrize('zarr_version', [None, 2, 3]) -def test_open_array(zarr_version): +def test_open_array(zarr_version, dimension_separator): store = 'data/array.zarr' kwargs = _init_creation_kwargs(zarr_version) # mode == 'w' - z = open_array(store, mode='w', shape=100, chunks=10, **kwargs) + z = open_array(store, mode='w', shape=100, chunks=10, + dimension_separator=dimension_separator, **kwargs) z[:] = 42 assert isinstance(z, Array) if z._store._store_version == 2: @@ -205,6 +207,11 @@ def test_open_array(zarr_version): assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) + if dimension_separator is None: + assert z._dimension_separator == '/' if zarr_version == 3 else '.' + else: + assert z._dimension_separator == dimension_separator + # mode in 'r', 'r+' group_kwargs = kwargs.copy() if zarr_version == 3: @@ -299,6 +306,37 @@ def test_open_array(zarr_version): assert os.path.abspath(chunk_store) == z.chunk_store.path +@pytest.mark.parametrize('dimension_separator', ['.', '/', None]) +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): + + if zarr_version == 3: + StoreClass = DirectoryStoreV3 + path = 'data' + else: + StoreClass = DirectoryStore + path = None + store = StoreClass('data/array.zarr', dimension_separator=dimension_separator) + + # Note: no dimension_separator kwarg to open_array + # we are testing here that it gets inferred from store + z = open_array(store, path=path, mode='w', shape=100, chunks=10) + z[:] = 42 + assert isinstance(z, Array) + if z._store._store_version == 2: + assert isinstance(z.store, DirectoryStore) + else: + assert isinstance(z.store, DirectoryStoreV3) + assert (100,) == z.shape + assert (10,) == z.chunks + assert_array_equal(np.full(100, fill_value=42), z[:]) + + if dimension_separator is None: + assert z._dimension_separator == '/' if zarr_version == 3 else '.' + else: + assert z._dimension_separator == dimension_separator + + # TODO: N5 support for v3 @pytest.mark.parametrize('zarr_version', [None, 2]) def test_open_array_n5(zarr_version): From e4ce79f5d9a4641d0ab62fa5d9d8a1e99b2e387f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 17:32:27 -0500 Subject: [PATCH 024/109] only allow Codec not a simple str as compressor during array initialization --- zarr/meta.py | 1 + zarr/storage.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index f187c90102..02730f0c01 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -512,6 +512,7 @@ def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) else: object_codec = None + compressor = cls._encode_codec_metadata(meta.get("compressor", None)) extensions = meta.get("extensions", []) meta = dict( diff --git a/zarr/storage.py b/zarr/storage.py index abb27e991c..ef5479c8fa 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -35,6 +35,7 @@ import uuid import time +from numcodecs.abc import Codec from numcodecs.compat import ( ensure_bytes, ensure_text, @@ -565,11 +566,17 @@ def _init_array_metadata( # obtain compressor config compressor_config = None - if store_version == 2 and compressor: - try: - compressor_config = compressor.get_config() - except AttributeError as e: - raise BadCompressorError(compressor) from e + if compressor: + if store_version == 2: + try: + compressor_config = compressor.get_config() + except AttributeError as e: + raise BadCompressorError(compressor) from e + elif not isinstance(compressor, Codec): + raise ValueError("expected a numcodecs Codec for compressor") + # TODO: alternatively, could autoconvert str to a Codec + # e.g. 'zlib' -> numcodec.Zlib object + # compressor = numcodecs.get_codec({'id': compressor}) # obtain filters config if filters: From 8bef4fcf09256c55e2a2e13f6a0a74d19eb0822c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 30 Nov 2021 23:04:01 -0500 Subject: [PATCH 025/109] add StoreV3 support to most convenience routines consolidated metadata functions haven't been updated yet --- zarr/convenience.py | 191 +++++++++++++++++++++------------ zarr/tests/test_convenience.py | 190 +++++++++++++++++++++++++++----- 2 files changed, 290 insertions(+), 91 deletions(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index 20afb496b7..d27629e4a4 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -7,13 +7,13 @@ from zarr.core import Array from zarr.creation import array as _create_array -from zarr.creation import normalize_store_arg, open_array +from zarr.creation import open_array from zarr.errors import CopyError, PathNotFoundError from zarr.hierarchy import Group from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, BaseStore +from zarr.storage import contains_array, contains_group, normalize_store_arg, BaseStore from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union @@ -21,8 +21,14 @@ StoreLike = Union[BaseStore, MutableMapping, str, None] +def _check_and_update_path(store: BaseStore, path): + if getattr(store, '_store_version', 2) > 2 and not path: + raise ValueError("path must be provided for v3 stores") + return normalize_storage_path(path) + + # noinspection PyShadowingBuiltins -def open(store: StoreLike = None, mode: str = "a", **kwargs): +def open(store: StoreLike = None, mode: str = "a", *, zarr_version=2, path=None, **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters @@ -34,6 +40,10 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs): read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). + zarr_version : {2, 3} + The zarr protocol version to use. + path : str + The path within the store to open. **kwargs Additional parameters are passed through to :func:`zarr.creation.open_array` or :func:`zarr.hierarchy.open_group`. @@ -75,15 +85,16 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs): """ - path = kwargs.get('path') # handle polymorphic store arg clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL _store: BaseStore = normalize_store_arg( - store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) + store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}), + zarr_version=zarr_version, ) - path = normalize_storage_path(path) + path = _check_and_update_path(_store, path) + kwargs['path'] = path if mode in {'w', 'w-', 'x'}: if 'shape' in kwargs: @@ -110,7 +121,7 @@ def _might_close(path): return isinstance(path, (str, os.PathLike)) -def save_array(store: StoreLike, arr, **kwargs): +def save_array(store: StoreLike, arr, *, zarr_version=2, path=None, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -120,6 +131,10 @@ def save_array(store: StoreLike, arr, **kwargs): Store or path to directory in file system or name of zip file. arr : ndarray NumPy array with data to save. + zarr_version : {2, 3} + The zarr protocol version to use when saving. + path : str + The path within the store where the array will be saved. kwargs Passed through to :func:`create`, e.g., compressor. @@ -142,16 +157,18 @@ def save_array(store: StoreLike, arr, **kwargs): """ may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True, zarr_version=zarr_version) + path = _check_and_update_path(_store, path) try: - _create_array(arr, store=_store, overwrite=True, **kwargs) + _create_array(arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, + **kwargs) finally: if may_need_closing: # needed to ensure zip file records are written _store.close() -def save_group(store: StoreLike, *args, **kwargs): +def save_group(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -161,6 +178,10 @@ def save_group(store: StoreLike, *args, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. + zarr_version : {2, 3} + The zarr protocol version to use when saving. + path : str + Path within the store where the group will be saved. kwargs NumPy arrays with data to save. @@ -213,21 +234,22 @@ def save_group(store: StoreLike, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True, zarr_version=zarr_version) + path = _check_and_update_path(_store, path) try: - grp = _create_group(_store, overwrite=True) + grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) for i, arr in enumerate(args): k = 'arr_{}'.format(i) - grp.create_dataset(k, data=arr, overwrite=True) + grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) for k, arr in kwargs.items(): - grp.create_dataset(k, data=arr, overwrite=True) + grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) finally: if may_need_closing: # needed to ensure zip file records are written _store.close() -def save(store: StoreLike, *args, **kwargs): +def save(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -236,6 +258,10 @@ def save(store: StoreLike, *args, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. + zarr_version : {2, 3} + The zarr protocol version to use when saving. + path : str + The path within the group where the arrays will be saved. kwargs NumPy arrays with data to save. @@ -302,9 +328,10 @@ def save(store: StoreLike, *args, **kwargs): if len(args) == 0 and len(kwargs) == 0: raise ValueError('at least one array must be provided') if len(args) == 1 and len(kwargs) == 0: - save_array(store, args[0]) + save_array(store, args[0], zarr_version=zarr_version, path=path) else: - save_group(store, *args, **kwargs) + save_group(store, *args, zarr_version=zarr_version, path=path, + **kwargs) class LazyLoader(Mapping): @@ -337,7 +364,7 @@ def __repr__(self): return r -def load(store: StoreLike): +def load(store: StoreLike, zarr_version=2, path=None): """Load data from an array or group into memory. Parameters @@ -363,11 +390,12 @@ def load(store: StoreLike): """ # handle polymorphic store arg - _store = normalize_store_arg(store) - if contains_array(_store, path=None): - return Array(store=_store, path=None)[...] - elif contains_group(_store, path=None): - grp = Group(store=_store, path=None) + _store = normalize_store_arg(store, zarr_version=zarr_version) + path = _check_and_update_path(_store, path) + if contains_array(_store, path=path): + return Array(store=_store, path=path)[...] + elif contains_group(_store, path=path): + grp = Group(store=_store, path=path) return LazyLoader(grp) @@ -601,6 +629,15 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 + source_store_version = getattr(source, '_store_version', 2) + dest_store_version = getattr(dest, '_store_version', 2) + if source_store_version != dest_store_version: + raise ValueError("zarr stores must share the same protocol version") + if source_store_version > 2: + if not source_path or not dest_path: + raise ValueError("v3 stores require specifying a non-empty " + "source_path and dest_path") + # setup logging with _LogWriter(log) as log: @@ -608,52 +645,63 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, for source_key in sorted(source.keys()): # filter to keys under source path - if source_key.startswith(source_path): + if source_store_version == 2: + if not source_key.startswith(source_path): + continue + elif source_store_version == 3: + # 'meta/root/' or 'data/root/' have length 10 + if not source_key[10:].startswith(source_path): + continue - # process excludes and includes - exclude = False - for prog in excludes: + # process excludes and includes + exclude = False + for prog in excludes: + if prog.search(source_key): + exclude = True + break + if exclude: + for prog in includes: if prog.search(source_key): - exclude = True + exclude = False break - if exclude: - for prog in includes: - if prog.search(source_key): - exclude = False - break - if exclude: - continue + if exclude: + continue - # map key to destination path + # map key to destination path + if source_store_version == 2: key_suffix = source_key[len(source_path):] dest_key = dest_path + key_suffix - - # create a descriptive label for this operation - descr = source_key - if dest_key != source_key: - descr = descr + ' -> ' + dest_key - - # decide what to do - do_copy = True - if if_exists != 'replace': - if dest_key in dest: - if if_exists == 'raise': - raise CopyError('key {!r} exists in destination' - .format(dest_key)) - elif if_exists == 'skip': - do_copy = False - - # take action - if do_copy: - log('copy {}'.format(descr)) - if not dry_run: - data = source[source_key] - n_bytes_copied += buffer_size(data) - dest[dest_key] = data - n_copied += 1 - else: - log('skip {}'.format(descr)) - n_skipped += 1 + elif source_store_version == 3: + # 10 is length of 'meta/root/' or 'data/root/' + key_suffix = source_key[10 + len(source_path):] + dest_key = source_key[:10] + dest_path + key_suffix + + # create a descriptive label for this operation + descr = source_key + if dest_key != source_key: + descr = descr + ' -> ' + dest_key + + # decide what to do + do_copy = True + if if_exists != 'replace': + if dest_key in dest: + if if_exists == 'raise': + raise CopyError('key {!r} exists in destination' + .format(dest_key)) + elif if_exists == 'skip': + do_copy = False + + # take action + if do_copy: + log('copy {}'.format(descr)) + if not dry_run: + data = source[source_key] + n_bytes_copied += buffer_size(data) + dest[dest_key] = data + n_copied += 1 + else: + log('skip {}'.format(descr)) + n_skipped += 1 # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) @@ -908,7 +956,15 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # copy attributes if not without_attrs: - ds.attrs.update(source.attrs) + if dest_h5py and 'filters' in source.attrs: + # No filters key in v3 metadata so it was stored in the + # attributes instead. We cannot copy this key to + # HDF5 attrs, though! + source_attrs = source.attrs.asdict().copy() + source_attrs.pop('filters', None) + else: + source_attrs = source.attrs + ds.attrs.update(source_attrs) n_copied += 1 @@ -1064,6 +1120,8 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 + zarr_version = getattr(source, '_version', 2) + # setup logging with _LogWriter(log) as log: @@ -1075,7 +1133,8 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, n_copied += c n_skipped += s n_bytes_copied += b - dest.attrs.update(**source.attrs) + if zarr_version == 2: + dest.attrs.update(**source.attrs) # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) @@ -1083,7 +1142,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: StoreLike, metadata_key=".zmetadata"): +def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index e5ccbd494d..f253ec5d05 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -27,29 +27,44 @@ from zarr.storage import ( ConsolidatedMetadataStore, MemoryStore, + MemoryStoreV3, atexit_rmtree, getsize, ) -def test_open_array(path_type): +def _init_creation_kwargs(zarr_version): + kwargs = {'zarr_version': zarr_version} + if zarr_version == 3: + kwargs['path'] = 'dataset' + return kwargs + + +@pytest.mark.parametrize('zarr_version', [2, 3]) +def test_open_array(path_type, zarr_version): store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) + kwargs = _init_creation_kwargs(zarr_version) # open array, create if doesn't exist - z = open(store, mode='a', shape=100) + z = open(store, mode='a', shape=100, **kwargs) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite - z = open(store, mode='w', shape=200) + z = open(store, mode='w', shape=200, **kwargs) assert isinstance(z, Array) assert z.shape == (200,) + if zarr_version == 3: + # cannot open a v3 array without path + with pytest.raises(ValueError): + open(store, mode='w', shape=200, zarr_version=3) + # open array, read-only - z = open(store, mode='r') + z = open(store, mode='r', **kwargs) assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only @@ -59,44 +74,70 @@ def test_open_array(path_type): open('doesnotexist', mode='r') -def test_open_group(path_type): +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_open_group(path_type, zarr_version): store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) + kwargs = _init_creation_kwargs(zarr_version) # open group, create if doesn't exist - g = open(store, mode='a') + g = open(store, mode='a', **kwargs) g.create_group('foo') assert isinstance(g, Group) assert 'foo' in g # open group, overwrite - g = open(store, mode='w') + g = open(store, mode='w', **kwargs) assert isinstance(g, Group) assert 'foo' not in g + if zarr_version == 3: + # cannot open a v3 group without path + with pytest.raises(ValueError): + open(store, mode='w', zarr_version=3) + # open group, read-only - g = open(store, mode='r') + g = open(store, mode='r', **kwargs) assert isinstance(g, Group) assert g.read_only -def test_save_errors(): +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_save_errors(zarr_version): with pytest.raises(ValueError): # no arrays provided - save_group('data/group.zarr') + save_group('data/group.zarr', zarr_version=zarr_version) + with pytest.raises(TypeError): + # no array provided + save_array('data/group.zarr', zarr_version=zarr_version) with pytest.raises(ValueError): # no arrays provided - save('data/group.zarr') + save('data/group.zarr', zarr_version=zarr_version) + + +def test_zarr_v3_save_errors(): + x = np.ones(8) + with pytest.raises(ValueError): + # no path provided + save_group('data/group.zr3', x, zarr_version=3) + with pytest.raises(ValueError): + # no path provided + save_array('data/group.zr3', x, zarr_version=3) + with pytest.raises(ValueError): + # no path provided + save('data/group.zr3', x, zarr_version=3) -def test_lazy_loader(): +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_lazy_loader(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = 'data/group.zarr' - save(store, foo=foo, bar=bar) - loader = load(store) + store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + kwargs = _init_creation_kwargs(zarr_version) + save(store, foo=foo, bar=bar, **kwargs) + loader = load(store, **kwargs) assert 'foo' in loader assert 'bar' in loader assert 'baz' not in loader @@ -106,6 +147,8 @@ def test_lazy_loader(): assert_array_equal(bar, loader['bar']) +# TODO: consolidated metadata currently only supported for v2 + def test_consolidate_metadata(): # setup initial data @@ -250,9 +293,12 @@ def setUp(self): source['bar/qux'] = b'zzz' self.source = source + def _get_dest_store(self): + return dict() + def test_no_paths(self): source = self.source - dest = dict() + dest = self._get_dest_store() copy_store(source, dest) assert len(source) == len(dest) for key in source: @@ -262,7 +308,7 @@ def test_source_path(self): source = self.source # paths should be normalized for source_path in 'bar', 'bar/', '/bar', '/bar/': - dest = dict() + dest = self._get_dest_store() copy_store(source, dest, source_path=source_path) assert 2 == len(dest) for key in source: @@ -276,7 +322,7 @@ def test_dest_path(self): source = self.source # paths should be normalized for dest_path in 'new', 'new/', '/new', '/new/': - dest = dict() + dest = self._get_dest_store() copy_store(source, dest, dest_path=dest_path) assert len(source) == len(dest) for key in source: @@ -288,7 +334,7 @@ def test_source_dest_path(self): # paths should be normalized for source_path in 'bar', 'bar/', '/bar', '/bar/': for dest_path in 'new', 'new/', '/new', '/new/': - dest = dict() + dest = self._get_dest_store() copy_store(source, dest, source_path=source_path, dest_path=dest_path) assert 2 == len(dest) @@ -304,14 +350,14 @@ def test_excludes_includes(self): source = self.source # single excludes - dest = dict() + dest = self._get_dest_store() excludes = 'f.*' copy_store(source, dest, excludes=excludes) assert len(dest) == 2 assert 'foo' not in dest # multiple excludes - dest = dict() + dest = self._get_dest_store() excludes = 'b.z', '.*x' copy_store(source, dest, excludes=excludes) assert len(dest) == 1 @@ -320,7 +366,7 @@ def test_excludes_includes(self): assert 'bar/qux' not in dest # excludes and includes - dest = dict() + dest = self._get_dest_store() excludes = 'b.*' includes = '.*x' copy_store(source, dest, excludes=excludes, includes=includes) @@ -331,13 +377,13 @@ def test_excludes_includes(self): def test_dry_run(self): source = self.source - dest = dict() + dest = self._get_dest_store() copy_store(source, dest, dry_run=True) assert 0 == len(dest) def test_if_exists(self): source = self.source - dest = dict() + dest = self._get_dest_store() dest['bar/baz'] = b'mmm' # default ('raise') @@ -415,7 +461,14 @@ def check_copied_array(original, copied, without_attrs=False, for k in original.attrs.keys(): assert k not in copied.attrs else: - assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) + if dest_h5py and 'filters' in original.attrs: + # special case in v3 (storing filters metadata under attributes) + # we explicitly do not copy this info over to HDF5 + original_attrs = original.attrs.asdict().copy() + original_attrs.pop('filters') + else: + original_attrs = original.attrs + assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) def check_copied_group(original, copied, without_attrs=False, expect_props=None, @@ -469,10 +522,32 @@ def test_copy_all(): dry_run=False, ) + assert 'subgroup' in destination_group assert destination_group.attrs["info"] == "group attrs" assert destination_group.subgroup.attrs["info"] == "sub attrs" +def test_copy_all_v3(): + """ + https://github.com/zarr-developers/zarr-python/issues/269 + + copy_all used to not copy attributes as `.keys()` + + """ + original_group = zarr.group(store=MemoryStoreV3(), path='group1', overwrite=True) + original_group.create_group("subgroup") + + destination_group = zarr.group(store=MemoryStoreV3(), path='group2', overwrite=True) + + # copy from memory to directory store + copy_all( + original_group, + destination_group, + dry_run=False, + ) + assert 'subgroup' in destination_group + + class TestCopy: @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) def source(self, request, tmpdir): @@ -715,3 +790,68 @@ def test_logging(self, source, dest, tmpdir): # bad option with pytest.raises(TypeError): copy(source['foo'], dest, dry_run=True, log=True) + + +class TestCopyV3(TestCopy): + + @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + def source(self, request, tmpdir): + def prep_source(source): + foo = source.create_group('foo') + foo.attrs['experiment'] = 'weird science' + baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) + baz.attrs['units'] = 'metres' + if request.param: + extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, + shuffle=True, fletcher32=True) + else: + extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) + source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), + chunks=(10, 2), dtype='i2', **extra_kws) + return source + + if request.param: + h5py = pytest.importorskip('h5py') + fn = tmpdir.join('source.h5') + with h5py.File(str(fn), mode='w') as h5f: + yield prep_source(h5f) + else: + yield prep_source(group(path='group1', zarr_version=3)) + + @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + def dest(self, request, tmpdir): + if request.param: + h5py = pytest.importorskip('h5py') + fn = tmpdir.join('dest.h5') + with h5py.File(str(fn), mode='w') as h5f: + yield h5f + else: + yield group(path='group2', zarr_version=3) + + def test_copy_array_create_options(self, source, dest): + dest_h5py = dest.__module__.startswith('h5py.') + + # copy array, provide creation options + compressor = Zlib(9) + create_kws = dict(chunks=(10,)) + if dest_h5py: + create_kws.update(compression='gzip', compression_opts=9, + shuffle=True, fletcher32=True, fillvalue=42) + else: + # v3 case has no filters argument in zarr create_kws + create_kws.update(compressor=compressor, fill_value=42, order='F') + copy(source['foo/bar/baz'], dest, without_attrs=True, **create_kws) + check_copied_array(source['foo/bar/baz'], dest['baz'], + without_attrs=True, expect_props=create_kws) + + def test_copy_group_no_name(self, source, dest): + if source.__module__.startswith('h5py'): + with pytest.raises(TypeError): + copy(source, dest) + else: + # For v3, dest.name will be inferred from source.name + copy(source, dest) + check_copied_group(source, dest[source.name.lstrip('/')]) + + copy(source, dest, name='root') + check_copied_group(source, dest['root']) From 9478ea305f00c54084f934d0b95e33f6c0dda6b0 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 15 Dec 2021 20:36:51 -0500 Subject: [PATCH 026/109] set convenience routines default to zarr_version=None This will infer the version from the store if it is a BaseStore. Otherwise it will use 2 for backwards compatibility --- zarr/convenience.py | 51 +++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index d27629e4a4..b2e60b80d8 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -28,7 +28,7 @@ def _check_and_update_path(store: BaseStore, path): # noinspection PyShadowingBuiltins -def open(store: StoreLike = None, mode: str = "a", *, zarr_version=2, path=None, **kwargs): +def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=None, **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters @@ -40,9 +40,11 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=2, path=None, read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). - zarr_version : {2, 3} - The zarr protocol version to use. - path : str + zarr_version : {2, 3, None}, optional + The zarr protocol version to use. The default value of None will attempt + to infer the version from `store` if possible, otherwise it will fall + back to 2. + path : str or None, optional The path within the store to open. **kwargs Additional parameters are passed through to :func:`zarr.creation.open_array` or @@ -93,7 +95,8 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=2, path=None, store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}), zarr_version=zarr_version, ) - path = _check_and_update_path(_store, path) + # path = _check_and_update_path(_store, path) + path = normalize_storage_path(path) kwargs['path'] = path if mode in {'w', 'w-', 'x'}: @@ -121,7 +124,7 @@ def _might_close(path): return isinstance(path, (str, os.PathLike)) -def save_array(store: StoreLike, arr, *, zarr_version=2, path=None, **kwargs): +def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -131,9 +134,11 @@ def save_array(store: StoreLike, arr, *, zarr_version=2, path=None, **kwargs): Store or path to directory in file system or name of zip file. arr : ndarray NumPy array with data to save. - zarr_version : {2, 3} - The zarr protocol version to use when saving. - path : str + zarr_version : {2, 3, None}, optional + The zarr protocol version to use when saving. The default value of None + will attempt to infer the version from `store` if possible, otherwise + it will fall back to 2. + path : str or None, optional The path within the store where the array will be saved. kwargs Passed through to :func:`create`, e.g., compressor. @@ -168,7 +173,7 @@ def save_array(store: StoreLike, arr, *, zarr_version=2, path=None, **kwargs): _store.close() -def save_group(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): +def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -178,9 +183,11 @@ def save_group(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. - zarr_version : {2, 3} - The zarr protocol version to use when saving. - path : str + zarr_version : {2, 3, None}, optional + The zarr protocol version to use when saving. The default value of None + will attempt to infer the version from `store` if possible, otherwise + it will fall back to 2. + path : str or None, optional Path within the store where the group will be saved. kwargs NumPy arrays with data to save. @@ -249,7 +256,7 @@ def save_group(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): _store.close() -def save(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): +def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -258,9 +265,11 @@ def save(store: StoreLike, *args, zarr_version=2, path=None, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. - zarr_version : {2, 3} - The zarr protocol version to use when saving. - path : str + zarr_version : {2, 3, None}, optional + The zarr protocol version to use when saving. The default value of None + will attempt to infer the version from `store` if possible, otherwise + it will fall back to 2. + path : str or None, optional The path within the group where the arrays will be saved. kwargs NumPy arrays with data to save. @@ -364,13 +373,19 @@ def __repr__(self): return r -def load(store: StoreLike, zarr_version=2, path=None): +def load(store: StoreLike, zarr_version=None, path=None): """Load data from an array or group into memory. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. + zarr_version : {2, 3, None}, optional + The zarr protocol version to use when loading. The default value of + None will attempt to infer the version from `store` if possible, + otherwise it will fall back to 2. + path : str or None, optional + The path within the store from which to load. Returns ------- From 2c21761bc633aef74774e2fa3686dc81ae6ab0bf Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 17:49:42 -0500 Subject: [PATCH 027/109] adjust test have dimension_separator key was removed from v3 metadata --- zarr/tests/test_storage_v3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 64fbb26843..37b626bbfe 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -98,7 +98,6 @@ def test_init_array(self, dimension_separator_fixture_v3): assert default_compressor == meta['compressor'] assert meta['fill_value'] is None # Missing MUST be assumed to be "/" - assert meta.get('dimension_separator', "/") is want_dim_sep assert meta['chunk_grid']['separator'] is want_dim_sep store.close() From 8783f4426f7199e45883e276ef358b7a393aaef2 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 18:21:43 -0500 Subject: [PATCH 028/109] add underscores to imported test classes in test_storage_v3.py avoids these tests running a second time when this file is called --- zarr/tests/test_storage_v3.py | 56 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 37b626bbfe..87244f0b7f 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -17,13 +17,23 @@ LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var -from .test_storage import (StoreTests, TestMemoryStore, TestDirectoryStore, - TestFSStore, TestNestedDirectoryStore, TestZipStore, - TestDBMStore, TestDBMStoreDumb, TestDBMStoreGnu, - TestDBMStoreNDBM, TestDBMStoreBerkeleyDB, - TestLMDBStore, TestSQLiteStore, - TestSQLiteStoreInMemory, TestLRUStoreCache, - skip_if_nested_chunks) +from .test_storage import ( + StoreTests, + TestMemoryStore as _TestMemoryStore, + TestDirectoryStore as _TestDirectoryStore, + TestFSStore as _TestFSStore, + TestNestedDirectoryStore as _TestNestedDirectoryStore, + TestZipStore as _TestZipStore, + TestDBMStore as _TestDBMStore, + TestDBMStoreDumb as _TestDBMStoreDumb, + TestDBMStoreGnu as _TestDBMStoreGnu, + TestDBMStoreNDBM as _TestDBMStoreNDBM, + TestDBMStoreBerkeleyDB as _TestDBMStoreBerkeleyDB, + TestLMDBStore as _TestLMDBStore, + TestSQLiteStore as _TestSQLiteStore, + TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory, + TestLRUStoreCache as _TestLRUStoreCache, + skip_if_nested_chunks) # pytest will fail to run if the following fixtures aren't imported here from .test_storage import dimension_separator_fixture, s3 # noqa @@ -363,14 +373,14 @@ def test_set_invalid_content(self): pass -class TestMemoryStoreV3(TestMemoryStore, StoreV3Tests): +class TestMemoryStoreV3(_TestMemoryStore, StoreV3Tests): def create_store(self, **kwargs): skip_if_nested_chunks(**kwargs) return MemoryStoreV3(**kwargs) -class TestDirectoryStoreV3(TestDirectoryStore, StoreV3Tests): +class TestDirectoryStoreV3(_TestDirectoryStore, StoreV3Tests): def create_store(self, normalize_keys=False, **kwargs): # For v3, don't have to skip if nested. @@ -383,7 +393,7 @@ def create_store(self, normalize_keys=False, **kwargs): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestFSStoreV3(TestFSStore, StoreV3Tests): +class TestFSStoreV3(_TestFSStore, StoreV3Tests): def create_store(self, normalize_keys=False, dimension_separator=".", @@ -451,7 +461,7 @@ def create_store(self, normalize_keys=False, key_separator=".", **kwargs): # TODO: remove NestedDirectoryStoreV3? -class TestNestedDirectoryStoreV3(TestNestedDirectoryStore, +class TestNestedDirectoryStoreV3(_TestNestedDirectoryStore, TestDirectoryStoreV3): def create_store(self, normalize_keys=False, **kwargs): @@ -478,10 +488,10 @@ def test_init_array(self): # TODO: enable once N5StoreV3 has been implemented # @pytest.mark.skipif(True, reason="N5StoreV3 not yet fully implemented") -# class TestN5StoreV3(TestN5Store, TestNestedDirectoryStoreV3, StoreV3Tests): +# class TestN5StoreV3(_TestN5Store, TestNestedDirectoryStoreV3, StoreV3Tests): -class TestZipStoreV3(TestZipStore, StoreV3Tests): +class TestZipStoreV3(_TestZipStore, StoreV3Tests): def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.zip') @@ -499,7 +509,7 @@ def test_mode(self): store.clear() -class TestDBMStoreV3(TestDBMStore, StoreV3Tests): +class TestDBMStoreV3(_TestDBMStore, StoreV3Tests): def create_store(self, dimension_separator=None): path = tempfile.mktemp(suffix='.anydbm') @@ -509,7 +519,7 @@ def create_store(self, dimension_separator=None): return store -class TestDBMStoreV3Dumb(TestDBMStoreDumb, StoreV3Tests): +class TestDBMStoreV3Dumb(_TestDBMStoreDumb, StoreV3Tests): def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.dumbdbm') @@ -520,7 +530,7 @@ def create_store(self, **kwargs): return store -class TestDBMStoreV3Gnu(TestDBMStoreGnu, StoreV3Tests): +class TestDBMStoreV3Gnu(_TestDBMStoreGnu, StoreV3Tests): def create_store(self, **kwargs): gdbm = pytest.importorskip("dbm.gnu") @@ -532,7 +542,7 @@ def create_store(self, **kwargs): return store # pragma: no cover -class TestDBMStoreV3NDBM(TestDBMStoreNDBM, StoreV3Tests): +class TestDBMStoreV3NDBM(_TestDBMStoreNDBM, StoreV3Tests): def create_store(self, **kwargs): ndbm = pytest.importorskip("dbm.ndbm") @@ -542,7 +552,7 @@ def create_store(self, **kwargs): return store # pragma: no cover -class TestDBMStoreV3BerkeleyDB(TestDBMStoreBerkeleyDB, StoreV3Tests): +class TestDBMStoreV3BerkeleyDB(_TestDBMStoreBerkeleyDB, StoreV3Tests): def create_store(self, **kwargs): bsddb3 = pytest.importorskip("bsddb3") @@ -552,7 +562,7 @@ def create_store(self, **kwargs): return store -class TestLMDBStoreV3(TestLMDBStore, StoreV3Tests): +class TestLMDBStoreV3(_TestLMDBStore, StoreV3Tests): def create_store(self, **kwargs): pytest.importorskip("lmdb") @@ -563,7 +573,7 @@ def create_store(self, **kwargs): return store -class TestSQLiteStoreV3(TestSQLiteStore, StoreV3Tests): +class TestSQLiteStoreV3(_TestSQLiteStore, StoreV3Tests): def create_store(self, **kwargs): pytest.importorskip("sqlite3") @@ -573,7 +583,7 @@ def create_store(self, **kwargs): return store -class TestSQLiteStoreV3InMemory(TestSQLiteStoreInMemory, StoreV3Tests): +class TestSQLiteStoreV3InMemory(_TestSQLiteStoreInMemory, StoreV3Tests): def create_store(self, **kwargs): pytest.importorskip("sqlite3") @@ -606,7 +616,7 @@ def create_store(self, **kwargs): return store -class TestLRUStoreCacheV3(TestLRUStoreCache, StoreV3Tests): +class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): def create_store(self, **kwargs): # wrapper therefore no dimension_separator argument @@ -834,4 +844,4 @@ def test_cache_keys(self): # TODO: implement ABSStoreV3 # @skip_test_env_var("ZARR_TEST_ABS") -# class TestABSStoreV3(TestABSStore, StoreV3Tests): +# class TestABSStoreV3(_TestABSStore, StoreV3Tests): From 981c6b9654a0e9ec98875ad6eb942f04cdf1a462 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 17 Dec 2021 18:33:11 -0500 Subject: [PATCH 029/109] add underscore to imported TestArrayWithPath in test_core_v3.py avoids this test class from being run a second time --- zarr/tests/test_core_v3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index d0a51088b0..b93db7e350 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -28,14 +28,14 @@ init_array, init_group, ) -from zarr.tests.test_core import TestArrayWithPath +from zarr.tests.test_core import TestArrayWithPath as _TestArrayWithPath from zarr.tests.util import have_fsspec from zarr.util import buffer_size # Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied -class TestArrayWithPathV3(TestArrayWithPath): +class TestArrayWithPathV3(_TestArrayWithPath): _version = 3 From fc5c1c13a8bcd28973d4004e0587d46e21013ba3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 10:38:04 -0500 Subject: [PATCH 030/109] refactore _valid_keys and add tests test _ensure_store(None) --- zarr/_storage/store.py | 80 +++++++++++++++++++---------------- zarr/tests/test_storage.py | 4 +- zarr/tests/test_storage_v3.py | 53 ++++++++++++++++++++++- 3 files changed, 98 insertions(+), 39 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 2c6d7b3978..fe6cebb363 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -133,6 +133,9 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) +_valid_key_characters = set(ascii_letters + digits + "/.-_") + + class StoreV3(BaseStore): _store_version = 3 _metadata_class = Metadata3 @@ -145,17 +148,32 @@ def _valid_key(key: str) -> bool: A key is any string containing only character in the range a-z, A-Z, 0-9, or in the set /.-_ it will return True if that's the case, False otherwise. - - In addition, in spec v3, keys can only start with the prefix meta/, - data/ or be exactly zarr.json and should not end with /. This should - not be exposed to the user, and is a store implementation detail, so - this method will raise a ValueError in that case. """ - if sys.version_info > (3, 7): - if not key.isascii(): - return False - if set(key) - set(ascii_letters + digits + "/.-_"): + if not isinstance(key, str) or not key.isascii(): + return False + if set(key) - _valid_key_characters: return False + return True + + @staticmethod + def _validate_key(key: str): + """ + Verify that a key conforms to the v3 specification. + + A key is any string containing only character in the range a-z, A-Z, + 0-9, or in the set /.-_ it will return True if that's the case, False + otherwise. + + In spec v3, keys can only start with the prefix meta/, data/ or be + exactly zarr.json and should not end with /. This should not be exposed + to the user, and is a store implementation detail, so this method will + raise a ValueError in that case. + """ + if not StoreV3._valid_key(key): + raise ValueError( + f"Keys must be ascii strings and may only contain the " + f"characters {''.join(sorted(_valid_key_characters))}" + ) if ( not key.startswith("data/") @@ -167,8 +185,6 @@ def _valid_key(key: str) -> bool: if key.endswith('/'): raise ValueError("keys may not end in /") - return True - def list_prefix(self, prefix): if prefix.startswith('/'): raise ValueError("prefix must not begin with /") @@ -214,35 +230,25 @@ def list(self): "The list method has not been implemented for this store type." ) - # TODO: Remove listdir? This method is just to match the current V2 stores - # The v3 spec mentions: list, list_dir, list_prefix - def listdir(self, path: str = ""): - if path and not path.endswith("/"): - path = path + "/" - keys, prefixes = self.list_dir(path) - prefixes = [p[len(path):].rstrip("/") for p in prefixes] - keys = [k[len(path):] for k in keys] - return keys + prefixes - - # TODO: rmdir here is identical to the rmdir on Store so could potentially - # move to BaseStore instead. - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - def __contains__(self, key): - # TODO: re-enable this check? - # if not key.startswith(("meta/", "data/")): - # raise ValueError( - # f'Key must start with either "meta/" or "data/". ' - # f'Got {key}' - # ) return key in self.list() + def __setitem__(self, key, value): + """Set a value. + + Here we validate the key name prior to calling __setitem__ + """ + self._validate_key(key) + return super().__setitem__(key, value) + + def __getitem__(self, key): + """Get a value. + + Here we validate the key name prior to calling __getitem__ + """ + self._validate_key(key) + return super().__getitem__(key) + def clear(self): """Remove all items from store.""" self.erase_prefix("/") diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index b168016d0c..ed879cce9f 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -67,13 +67,15 @@ def test_kvstore_repr(): repr(KVStore(dict())) -def test_invalid_store(): +def test_ensure_store(): class InvalidStore: pass with pytest.raises(ValueError): Store._ensure_store(InvalidStore()) + assert Store._ensure_store(None) is None + def test_capabilities(): s = KVStore(dict()) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 87244f0b7f..4b86e34afb 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from zarr._storage.store import _valid_key_characters from zarr.codecs import Zlib from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.meta import ZARR_FORMAT @@ -14,7 +15,8 @@ from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, DirectoryStoreV3, NestedDirectoryStoreV3, RedisStoreV3, MongoDBStoreV3, DBMStoreV3, - LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) + LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3, + StoreV3) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var from .test_storage import ( @@ -48,6 +50,55 @@ def dimension_separator_fixture_v3(request): return request.param +def test_ensure_store_v3(): + class InvalidStore: + pass + + with pytest.raises(ValueError): + StoreV3._ensure_store(InvalidStore()) + + assert StoreV3._ensure_store(None) is None + + +def test_valid_key(): + store = KVStoreV3(dict) + + # only ascii keys are valid + assert not store._valid_key(5) + assert not store._valid_key(2.8) + + for key in _valid_key_characters: + assert store._valid_key(key) + + # other characters not in _valid_key_characters are not allowed + assert not store._valid_key('*') + assert not store._valid_key('~') + assert not store._valid_key('^') + + +def test_validate_key(): + store = KVStoreV3(dict) + + # zarr.json is a valid key + store._validate_key('zarr.json') + # but other keys not starting with meta/ or data/ are not + with pytest.raises(ValueError): + store._validate_key('zar.json') + + # valid ascii keys + for valid in ['meta/root/arr1.array.json', + 'data/root/arr1.array.json', + 'meta/root/subfolder/item_1-0.group.json']: + store._validate_key(valid) + # but otherwise valid keys cannot end in / + with pytest.raises(ValueError): + assert store._validate_key(valid + '/') + + for invalid in [0, '*', '~', '^', '&']: + with pytest.raises(ValueError): + store._validate_key(invalid) + + class StoreV3Tests(StoreTests): def test_getsize(self): From 83cf3457a804eab2825153d35ad9c2b5f03f8d9b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 10:40:32 -0500 Subject: [PATCH 031/109] move KVStoreV3 logic from StoreV3.__eq__ to KVStoreV3.__eq__ --- zarr/_storage/store.py | 6 +----- zarr/storage.py | 5 +++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index fe6cebb363..c230700fe2 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -254,11 +254,7 @@ def clear(self): self.erase_prefix("/") def __eq__(self, other): - from zarr.storage import KVStoreV3 # avoid circular import - if isinstance(other, KVStoreV3): - return self._mutable_mapping == other._mutable_mapping - else: - return NotImplemented + return NotImplemented @staticmethod def _ensure_store(store): diff --git a/zarr/storage.py b/zarr/storage.py index ef5479c8fa..66b2348bff 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2943,6 +2943,11 @@ class KVStoreV3(KVStore, StoreV3): def list(self): return list(self._mutable_mapping.keys()) + def __eq__(self, other): + return ( + isinstance(other, KVStoreV3) and + self._mutable_mapping == other._mutable_mapping + ) KVStoreV3.__doc__ = KVStore.__doc__ From 1dbce4acd5533a4680b3305e6434ffcbf8e9100f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 10:48:21 -0500 Subject: [PATCH 032/109] expand tests for _ensure_store --- zarr/tests/test_storage_v3.py | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 4b86e34afb..4f0f553965 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -50,6 +50,37 @@ def dimension_separator_fixture_v3(request): return request.param +class DummyStore(): + # contains all methods expected of Mutable Mapping + + def keys(self): + pass + + def values(self): + pass + + def get(self, value, default=None): + pass + + def __setitem__(self, key, value): + pass + + def __getitem__(self, key): + pass + + def __delitem__(self, key): + pass + + def __contains__(self, key): + pass + + +class InvalidDummyStore(): + # does not contain expected methods of a MutableMapping + + def keys(self): + pass + def test_ensure_store_v3(): class InvalidStore: pass @@ -59,6 +90,13 @@ class InvalidStore: assert StoreV3._ensure_store(None) is None + # class with all methods of a MutableMapping will become a KVStoreV3 + assert isinstance(StoreV3._ensure_store(DummyStore), KVStoreV3) + + with pytest.raises(ValueError): + # does not have the methods expected of a MutableMapping + StoreV3._ensure_store(InvalidDummyStore) + def test_valid_key(): store = KVStoreV3(dict) From 5391c46a9025a6c39b8775bd0a5437661494f0ee Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 10:53:30 -0500 Subject: [PATCH 033/109] test exception for v2 store input to _get_hierarchy_metadata --- zarr/tests/test_storage.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ed879cce9f..94993326bb 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -18,6 +18,7 @@ from numcodecs.compat import ensure_bytes import zarr +from zarr._storage.store import _get_hierarchy_metadata from zarr.codecs import BZ2, AsType, Blosc, Zlib from zarr.errors import MetadataError from zarr.hierarchy import group @@ -2215,3 +2216,9 @@ def test_fill_value_change(): assert a[0, 0] == 1 assert json.loads(a.store[".zarray"])["fill_value"] == 1 + + +def test_get_hierarchy_metadata_v2(): + # v2 stores do not have hierarchy metadata (i.e. zarr.json) + with pytest.raises(ValueError): + _get_hierarchy_metadata(KVStore(dict)) From 6feacdb3eabb97e5e2f15ed27a974988417a4e2e Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 10:57:10 -0500 Subject: [PATCH 034/109] test exception for init_array with path=None --- zarr/tests/test_core_v3.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index b93db7e350..a1f5e20d1c 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -95,6 +95,11 @@ def test_array_init(self): assert group_key not in store assert ('meta/root/' + path + '.array.json') in store + def test_array_no_path(self): + # passing path=None to init_array will raise an exception + with pytest.raises(ValueError): + self.create_array(shape=1000, chunks=100, array_path=None) + def test_nbytes_stored(self): # dict as store From bf244df76ae82a445164673d0ddcdae3b2cb4c79 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 11:07:11 -0500 Subject: [PATCH 035/109] remove unneeded checks from Attributes The store can reject invalid v3 keys. _update_nosync calls _get_nosync which will add the 'attributes' key if missing --- zarr/attrs.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 78c26461c4..ff3caea958 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -30,9 +30,6 @@ def __init__(self, store, key='.zattrs', read_only=False, cache=True, self._version = getattr(store, '_store_version', 2) assert key - if self._version == 3 and '.z' in key: - raise ValueError('invalid v3 key') - _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) self.key = key @@ -170,8 +167,6 @@ def _update_nosync(self, *args, **kwargs): if self._version == 2: d.update(*args, **kwargs) else: - if 'attributes' not in d: - d['attributes'] = {} d['attributes'].update(*args, **kwargs) # _put modified data From 097459b0404d5d6eb07dd6c17997ef7821a9f284 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 12:01:42 -0500 Subject: [PATCH 036/109] tests __repr__ of LazyLoader --- zarr/tests/test_convenience.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index f253ec5d05..0edc1a8ebf 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -145,6 +145,7 @@ def test_lazy_loader(zarr_version): assert sorted(loader) == ['bar', 'foo'] assert_array_equal(foo, loader['foo']) assert_array_equal(bar, loader['bar']) + assert 'LazyLoader: ' in repr(loader) # TODO: consolidated metadata currently only supported for v2 From 2f1519566a6fced625fd9b51be71fc6a09527139 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 12:14:16 -0500 Subject: [PATCH 037/109] test load of individual array --- zarr/tests/test_convenience.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 0edc1a8ebf..6f4dbe5937 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -148,6 +148,25 @@ def test_lazy_loader(zarr_version): assert 'LazyLoader: ' in repr(loader) +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_load_array(zarr_version): + foo = np.arange(100) + bar = np.arange(100, 0, -1) + store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + kwargs = _init_creation_kwargs(zarr_version) + save(store, foo=foo, bar=bar, **kwargs) + + # can also load arrays directly into a numpy array + for array_name in ['foo', 'bar']: + array_path = 'dataset/' + array_name if zarr_version == 3 else array_name + array = load(store, path=array_path, zarr_version=zarr_version) + assert isinstance(array, np.ndarray) + if array_name == 'foo': + assert_array_equal(foo, array) + else: + assert_array_equal(bar, array) + + # TODO: consolidated metadata currently only supported for v2 def test_consolidate_metadata(): From 235b4451aad8d4418e53bd0106701863f030f150 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 12:19:36 -0500 Subject: [PATCH 038/109] Add simple test case for zarr.tree convenience method --- zarr/tests/test_convenience.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 6f4dbe5937..b0aa370bdb 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -167,6 +167,19 @@ def test_load_array(zarr_version): assert_array_equal(bar, array) +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_tree(zarr_version): + kwargs = _init_creation_kwargs(zarr_version) + g1 = zarr.group(**kwargs) + g2 = g1.create_group('foo') + g3 = g1.create_group('bar') + g4 = g3.create_group('baz') + g5 = g3.create_group('qux') + d1 = g5.create_dataset('baz', shape=100, chunks=10) + assert repr(zarr.tree(g1)) == repr(g1.tree()) + assert str(zarr.tree(g1)) == str(g1.tree()) + + # TODO: consolidated metadata currently only supported for v2 def test_consolidate_metadata(): From 0b82bdacfb94cdd2ece23c6bc82a35fc30c252d8 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 13:03:40 -0500 Subject: [PATCH 039/109] add tests for copy_store with a V3 store class --- zarr/convenience.py | 4 --- zarr/tests/test_convenience.py | 61 +++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index b2e60b80d8..4b26f3ed35 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -648,10 +648,6 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, dest_store_version = getattr(dest, '_store_version', 2) if source_store_version != dest_store_version: raise ValueError("zarr stores must share the same protocol version") - if source_store_version > 2: - if not source_path or not dest_path: - raise ValueError("v3 stores require specifying a non-empty " - "source_path and dest_path") # setup logging with _LogWriter(log) as log: diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index b0aa370bdb..59e6a34ed1 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -28,6 +28,8 @@ ConsolidatedMetadataStore, MemoryStore, MemoryStoreV3, + KVStore, + KVStoreV3, atexit_rmtree, getsize, ) @@ -319,6 +321,8 @@ def test_save_array_separator(tmpdir, options): class TestCopyStore(unittest.TestCase): + _version = 2 + def setUp(self): source = dict() source['foo'] = b'xxx' @@ -359,7 +363,10 @@ def test_dest_path(self): copy_store(source, dest, dest_path=dest_path) assert len(source) == len(dest) for key in source: - dest_key = 'new/' + key + if self._version == 3: + dest_key = key[:10] + 'new/' + key[10:] + else: + dest_key = 'new/' + key assert source[key] == dest[dest_key] def test_source_dest_path(self): @@ -387,16 +394,18 @@ def test_excludes_includes(self): excludes = 'f.*' copy_store(source, dest, excludes=excludes) assert len(dest) == 2 - assert 'foo' not in dest + + root = '' if self._version == 2 else 'meta/root/' + assert root + 'foo' not in dest # multiple excludes dest = self._get_dest_store() excludes = 'b.z', '.*x' copy_store(source, dest, excludes=excludes) assert len(dest) == 1 - assert 'foo' in dest - assert 'bar/baz' not in dest - assert 'bar/qux' not in dest + assert root + 'foo' in dest + assert root + 'bar/baz' not in dest + assert root + 'bar/qux' not in dest # excludes and includes dest = self._get_dest_store() @@ -404,9 +413,9 @@ def test_excludes_includes(self): includes = '.*x' copy_store(source, dest, excludes=excludes, includes=includes) assert len(dest) == 2 - assert 'foo' in dest - assert 'bar/baz' not in dest - assert 'bar/qux' in dest + assert root + 'foo' in dest + assert root + 'bar/baz' not in dest + assert root + 'bar/qux' in dest def test_dry_run(self): source = self.source @@ -417,7 +426,8 @@ def test_dry_run(self): def test_if_exists(self): source = self.source dest = self._get_dest_store() - dest['bar/baz'] = b'mmm' + root = '' if self._version == 2 else 'meta/root/' + dest[root + 'bar/baz'] = b'mmm' # default ('raise') with pytest.raises(CopyError): @@ -430,22 +440,43 @@ def test_if_exists(self): # skip copy_store(source, dest, if_exists='skip') assert 3 == len(dest) - assert dest['foo'] == b'xxx' - assert dest['bar/baz'] == b'mmm' - assert dest['bar/qux'] == b'zzz' + assert dest[root + 'foo'] == b'xxx' + assert dest[root + 'bar/baz'] == b'mmm' + assert dest[root + 'bar/qux'] == b'zzz' # replace copy_store(source, dest, if_exists='replace') assert 3 == len(dest) - assert dest['foo'] == b'xxx' - assert dest['bar/baz'] == b'yyy' - assert dest['bar/qux'] == b'zzz' + assert dest[root + 'foo'] == b'xxx' + assert dest[root + 'bar/baz'] == b'yyy' + assert dest[root + 'bar/qux'] == b'zzz' # invalid option with pytest.raises(ValueError): copy_store(source, dest, if_exists='foobar') +class TestCopyStoreV3(TestCopyStore): + + _version = 3 + + def setUp(self): + source = KVStoreV3(dict()) + source['meta/root/foo'] = b'xxx' + source['meta/root/bar/baz'] = b'yyy' + source['meta/root/bar/qux'] = b'zzz' + self.source = source + + def _get_dest_store(self): + return KVStoreV3(dict()) + + def test_mismatched_store_versions(self): + # cannot copy between stores of mixed Zarr versions + dest = KVStore(dict()) + with pytest.raises(ValueError): + copy_store(self.source, dest) + + def check_copied_array(original, copied, without_attrs=False, expect_props=None): From ab71ed5ad88d686a340cc01f5c52a707db96e068 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 13:20:28 -0500 Subject: [PATCH 040/109] test raising of exception on intialization with mismatched store and chunk_store protocol versions --- zarr/core.py | 6 +----- zarr/storage.py | 6 ++++++ zarr/tests/test_core_v3.py | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index e70d5591f7..be9178bdda 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -169,15 +169,11 @@ def __init__( store = normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', 2) + zarr_version = store._store_version if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, zarr_version=zarr_version) - if not getattr(chunk_store, '_store_version', 2) == zarr_version: - raise ValueError( - "zarr_version of store and chunk_store must match" - ) self._store = store self._chunk_store = chunk_store diff --git a/zarr/storage.py b/zarr/storage.py index 66b2348bff..b56f2493e1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -137,6 +137,12 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", # add default zarr.json metadata store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) return store + elif hasattr(store, '_store_version') and store._store_version != zarr_version: + raise ValueError( + f"store is a zarr v{store._store_version} store which conflicts " + f"with the specified zarr_version ({zarr_version})." + ) + if isinstance(store, os.PathLike): store = os.fspath(store) if isinstance(store, str): diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index a1f5e20d1c..8f8f8b5298 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -17,6 +17,7 @@ DBMStoreV3, DirectoryStoreV3, FSStoreV3, + KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, @@ -901,3 +902,22 @@ def test_read_from_all_blocks(self): z[2:99_000] = 1 b = Array(z.store, path=z.path, read_only=True, partial_decompress=True) assert (b[2:99_000] == 1).all() + + +def test_array_mismatched_store_versions(): + store_v3 = KVStoreV3(dict()) + store_v2 = KVStore(dict()) + + # separate chunk store + chunk_store_v2 = KVStore(dict()) + chunk_store_v3 = KVStoreV3(dict()) + + init_kwargs = dict(shape=100, chunks=10, dtype=" Date: Sun, 19 Dec 2021 23:13:29 -0500 Subject: [PATCH 041/109] add key validation on setitem in v3 stores enable missing test_hierarchy for v3 stores. This required fixes to a number of the rename and rmdir methods for the V3 stores --- zarr/_storage/store.py | 38 ++-- zarr/storage.py | 167 ++++++++++---- zarr/tests/test_storage.py | 406 ++++++++++++++++++---------------- zarr/tests/test_storage_v3.py | 390 ++++++++++++++------------------ 4 files changed, 521 insertions(+), 480 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index c230700fe2..e60f768f83 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,3 +1,4 @@ +import abc import sys from collections.abc import MutableMapping from string import ascii_letters, digits @@ -233,21 +234,15 @@ def list(self): def __contains__(self, key): return key in self.list() + @abc.abstractmethod def __setitem__(self, key, value): - """Set a value. - - Here we validate the key name prior to calling __setitem__ - """ - self._validate_key(key) - return super().__setitem__(key, value) + """Set a value.""" + return + @abc.abstractmethod def __getitem__(self, key): - """Get a value. - - Here we validate the key name prior to calling __getitem__ - """ - self._validate_key(key) - return super().__getitem__(key) + """Get a value.""" + return def clear(self): """Remove all items from store.""" @@ -327,17 +322,18 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: dst_prefix = _path_to_prefix(dst_path) version = getattr(store, '_store_version', 2) if version == 2: - root_prefixes = [''] - elif version == 3: - root_prefixes = ['meta/root/', 'data/root/'] - for root_prefix in root_prefixes: - _src_prefix = root_prefix + src_prefix - _dst_prefix = root_prefix + dst_prefix for key in list(store.keys()): - if key.startswith(_src_prefix): - new_key = _dst_prefix + key.lstrip(_src_prefix) + if key.startswith(src_prefix): + new_key = dst_prefix + key.lstrip(src_prefix) store[new_key] = store.pop(key) - if version == 3: + else: + for root_prefix in ['meta/root/', 'data/root/']: + _src_prefix = root_prefix + src_prefix + _dst_prefix = root_prefix + dst_prefix + for key in store.list_prefix(_src_prefix): + new_key = _dst_prefix + key[len(_src_prefix):] + store[new_key] = store.pop(key) + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] _src_array_json = 'meta/root/' + src_prefix[:-1] + '.array' + sfx if _src_array_json in store: diff --git a/zarr/storage.py b/zarr/storage.py index b56f2493e1..52a4ed2b4b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2667,10 +2667,11 @@ def getsize(self, path=None): def rmdir(self, path=None): path = normalize_storage_path(path) if path: - with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,) - ) + for base in ['meta/root/', 'data/root/']: + with self.lock: + self.cursor.execute( + 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) + ) else: self.clear() @@ -2949,6 +2950,10 @@ class KVStoreV3(KVStore, StoreV3): def list(self): return list(self._mutable_mapping.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + def __eq__(self, other): return ( isinstance(other, KVStoreV3) and @@ -2963,6 +2968,10 @@ class FSStoreV3(FSStore, StoreV3): # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) _META_KEYS = () + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + def _default_key_separator(self): if self.key_separator is None: self.key_separator = "/" @@ -3007,6 +3016,19 @@ def setitems(self, values): self.map.setitems(values) + def rmdir(self, path=None): + if self.mode == 'r': + raise ReadOnlyError() + if path: + for base in ['meta/root/', 'data/root/']: + store_path = self.dir_path(base + path) + if self.fs.isdir(store_path): + self.fs.rm(store_path, recursive=True) + else: + store_path = self.dir_path(path) + if self.fs.isdir(store_path): + self.fs.rm(store_path, recursive=True) + class MemoryStoreV3(MemoryStore, StoreV3): @@ -3026,6 +3048,10 @@ def __eq__(self, other): self.cls == other.cls ) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + def list(self): return list(self.keys()) @@ -3046,6 +3072,36 @@ def getsize(self, path: Path = None): return -1 return size + def rename(self, src_path: Path, dst_path: Path): + src_path = normalize_storage_path(src_path) + dst_path = normalize_storage_path(dst_path) + + any_renamed = False + for base in ['meta/root/', 'data/root/']: + if self.list_prefix(base + src_path): + src_parent, src_key = self._get_parent(base + src_path) + dst_parent, dst_key = self._require_parent(base + dst_path) + + dst_parent[dst_key] = src_parent.pop(src_key) + any_renamed = True + if not any_renamed: + raise ValueError(f"no item {src_path} found to rename") + + def rmdir(self, path: Path = None): + path = normalize_storage_path(path) + if path: + for base in ['meta/root/', 'data/root/']: + try: + parent, key = self._get_parent(base + path) + value = parent[key] + except KeyError: + return + else: + if isinstance(value, self.cls): + del parent[key] + else: + # clear out root + self.root = self.cls() MemoryStoreV3.__doc__ = MemoryStore.__doc__ @@ -3061,26 +3117,9 @@ def __eq__(self, other): self.path == other.path ) - # def getsize(self, path=None): - # size = 0 - # if path is None or path == '': - # # add array and group folders if present - # dirs = [] - # for d in ['data/root', 'meta/root']: - # dir_path = os.path.join(self.path, d) - # if os.path.exists(dir_path): - # dirs.append(dir_path) - # print(f"dirs={dirs}") - # else: - # files, dirs = _get_files_and_dirs_from_path(self, path) - # for file in files: - # size += os.path.getsize(file) - # for d in dirs: - # for child in scandir(d): - # print(f"child={child}") - # if child.is_file(): - # size += child.stat().st_size - # return size + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) def getsize(self, path: Path = None): size = 0 @@ -3125,6 +3164,17 @@ def rename(self, src_path, dst_path, metadata_key_suffix='.json'): if not any_existed: raise FileNotFoundError("nothing found at src_path") + def rmdir(self, path=None): + store_path = normalize_storage_path(path) + dir_path = self.path + if store_path: + for base in ['meta/root/', 'data/root/']: + dir_path = os.path.join(dir_path, base + store_path) + if os.path.isdir(dir_path): + shutil.rmtree(dir_path) + # TODO: also remove any residual .array.json or .group.json files? + elif os.path.isdir(dir_path): + shutil.rmtree(dir_path) DirectoryStoreV3.__doc__ = DirectoryStore.__doc__ @@ -3142,6 +3192,10 @@ def __eq__(self, other): self.allowZip64 == other.allowZip64 ) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + def getsize(self, path=None): path = normalize_storage_path(path) with self.mutex: @@ -3181,6 +3235,10 @@ def __eq__(self, other): self.path == other.path ) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + NestedDirectoryStoreV3.__doc__ = NestedDirectoryStore.__doc__ @@ -3190,6 +3248,10 @@ class RedisStoreV3(RedisStore, StoreV3): def list(self): return list(self.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + RedisStoreV3.__doc__ = RedisStore.__doc__ @@ -3199,6 +3261,10 @@ class MongoDBStoreV3(MongoDBStore, StoreV3): def list(self): return list(self.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ @@ -3208,6 +3274,10 @@ class DBMStoreV3(DBMStore, StoreV3): def list(self): return list(self.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + DBMStoreV3.__doc__ = DBMStore.__doc__ @@ -3217,6 +3287,10 @@ class LMDBStoreV3(LMDBStore, StoreV3): def list(self): return list(self.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + LMDBStoreV3.__doc__ = LMDBStore.__doc__ @@ -3227,25 +3301,28 @@ def list(self): return list(self.keys()) def getsize(self, path=None): - if path is None or path == '': - # TODO: why does the query below not work in this case? - # For now fall back to the default _getsize implementation - return _getsize(self, path) - else: - path = normalize_storage_path(path) - size = 0 - for _path in ['data/root/' + path, 'meta/root/' + path]: - c = self.cursor.execute( - ''' - SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr - WHERE k LIKE (? || "%") AND - 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - ''', - (_path, _path) - ) - for item_size, in c: - size += item_size - return size + # TODO: why does the query below not work in this case? + # For now fall back to the default _getsize implementation + # size = 0 + # for _path in ['data/root/' + path, 'meta/root/' + path]: + # c = self.cursor.execute( + # ''' + # SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr + # WHERE k LIKE (? || "%") AND + # 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") + # ''', + # (_path, _path) + # ) + # for item_size, in c: + # size += item_size + # return size + + # fallback to default implementation for now + return _getsize(self, path) + + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ @@ -3267,5 +3344,9 @@ def __init__(self, store, max_size: int): def list(self): return list(self.keys()) + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 94993326bb..9015cc99c7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -109,6 +109,9 @@ def test_deprecated_listdir_nosotre(): class StoreTests: """Abstract store tests.""" + version = 2 + root = '' + def create_store(self, **kwargs): # pragma: no cover # implement in sub-class raise NotImplementedError @@ -121,27 +124,28 @@ def test_get_set_del_contains(self): store = self.create_store() # test __contains__, __getitem__, __setitem__ - assert 'foo' not in store + key = self.root + 'foo' + assert key not in store with pytest.raises(KeyError): # noinspection PyStatementEffect - store['foo'] - store['foo'] = b'bar' - assert 'foo' in store - assert b'bar' == ensure_bytes(store['foo']) + store[key] + store[key] = b'bar' + assert key in store + assert b'bar' == ensure_bytes(store[key]) # test __delitem__ (optional) try: - del store['foo'] + del store[key] except NotImplementedError: pass else: - assert 'foo' not in store + assert key not in store with pytest.raises(KeyError): # noinspection PyStatementEffect - store['foo'] + store[key] with pytest.raises(KeyError): # noinspection PyStatementEffect - del store['foo'] + del store[key] store.close() @@ -149,49 +153,49 @@ def test_set_invalid_content(self): store = self.create_store() with pytest.raises(TypeError): - store['baz'] = list(range(5)) + store[self.root + 'baz'] = list(range(5)) store.close() def test_clear(self): store = self.create_store() - store['foo'] = b'bar' - store['baz'] = b'qux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'qux' assert len(store) == 2 store.clear() assert len(store) == 0 - assert 'foo' not in store - assert 'baz' not in store + assert self.root + 'foo' not in store + assert self.root + 'baz' not in store store.close() def test_pop(self): store = self.create_store() - store['foo'] = b'bar' - store['baz'] = b'qux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'qux' assert len(store) == 2 - v = store.pop('foo') + v = store.pop(self.root + 'foo') assert ensure_bytes(v) == b'bar' assert len(store) == 1 - v = store.pop('baz') + v = store.pop(self.root + 'baz') assert ensure_bytes(v) == b'qux' assert len(store) == 0 with pytest.raises(KeyError): - store.pop('xxx') - v = store.pop('xxx', b'default') + store.pop(self.root + 'xxx') + v = store.pop(self.root + 'xxx', b'default') assert v == b'default' - v = store.pop('xxx', b'') + v = store.pop(self.root + 'xxx', b'') assert v == b'' - v = store.pop('xxx', None) + v = store.pop(self.root + 'xxx', None) assert v is None store.close() def test_popitem(self): store = self.create_store() - store['foo'] = b'bar' + store[self.root + 'foo'] = b'bar' k, v = store.popitem() - assert k == 'foo' + assert k == self.root + 'foo' assert ensure_bytes(v) == b'bar' assert len(store) == 0 with pytest.raises(KeyError): @@ -203,20 +207,27 @@ def test_writeable_values(self): store = self.create_store() # __setitem__ should accept any value that implements buffer interface - store['foo1'] = b'bar' - store['foo2'] = bytearray(b'bar') - store['foo3'] = array.array('B', b'bar') - store['foo4'] = np.frombuffer(b'bar', dtype='u1') + store[self.root + 'foo1'] = b'bar' + store[self.root + 'foo2'] = bytearray(b'bar') + store[self.root + 'foo3'] = array.array('B', b'bar') + store[self.root + 'foo4'] = np.frombuffer(b'bar', dtype='u1') store.close() def test_update(self): store = self.create_store() - assert 'foo' not in store - assert 'baz' not in store - store.update(foo=b'bar', baz=b'quux') - assert b'bar' == ensure_bytes(store['foo']) - assert b'quux' == ensure_bytes(store['baz']) + assert self.root + 'foo' not in store + assert self.root + 'baz' not in store + + if self.version == 2: + store.update(foo=b'bar', baz=b'quux') + else: + kv = {self.root + 'foo': b'bar', + self.root + 'baz': b'quux'} + store.update(kv) + + assert b'bar' == ensure_bytes(store[self.root + 'foo']) + assert b'quux' == ensure_bytes(store[self.root + 'baz']) store.close() @@ -231,18 +242,23 @@ def test_iterators(self): assert set() == set(store.items()) # setup some values - store['a'] = b'aaa' - store['b'] = b'bbb' - store['c/d'] = b'ddd' - store['c/e/f'] = b'fff' + store[self.root + 'a'] = b'aaa' + store[self.root + 'b'] = b'bbb' + store[self.root + 'c/d'] = b'ddd' + store[self.root + 'c/e/f'] = b'fff' # test iterators on store with data assert 4 == len(store) - assert {'a', 'b', 'c/d', 'c/e/f'} == set(store) - assert {'a', 'b', 'c/d', 'c/e/f'} == set(store.keys()) + expected = set(self.root + k for k in ['a', 'b', 'c/d', 'c/e/f']) + assert expected == set(store) + assert expected == set(store.keys()) assert {b'aaa', b'bbb', b'ddd', b'fff'} == set(map(ensure_bytes, store.values())) - assert ({('a', b'aaa'), ('b', b'bbb'), ('c/d', b'ddd'), ('c/e/f', b'fff')} == - set(map(lambda kv: (kv[0], ensure_bytes(kv[1])), store.items()))) + assert ({(self.root + 'a', b'aaa'), + (self.root + 'b', b'bbb'), + (self.root + 'c/d', b'ddd'), + (self.root + 'c/e/f', b'fff')} == set( + map(lambda kv: (kv[0], ensure_bytes(kv[1])), store.items())) + ) store.close() @@ -250,8 +266,8 @@ def test_pickle(self): # setup store store = self.create_store() - store['foo'] = b'bar' - store['baz'] = b'quux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'quux' n = len(store) keys = sorted(store.keys()) @@ -267,8 +283,8 @@ def test_pickle(self): # verify assert n == len(store2) assert keys == sorted(store2.keys()) - assert b'bar' == ensure_bytes(store2['foo']) - assert b'quux' == ensure_bytes(store2['baz']) + assert b'bar' == ensure_bytes(store2[self.root + 'foo']) + assert b'quux' == ensure_bytes(store2[self.root + 'baz']) store2.close() @@ -761,41 +777,41 @@ def test_set_invalid_content(self): pass -def setdel_hierarchy_checks(store): +def setdel_hierarchy_checks(store, root=''): # these tests are for stores that are aware of hierarchy levels; this # behaviour is not strictly required by Zarr but these tests are included # to define behaviour of MemoryStore and DirectoryStore classes # check __setitem__ and __delitem__ blocked by leaf - store['a/b'] = b'aaa' + store[root + 'a/b'] = b'aaa' with pytest.raises(KeyError): - store['a/b/c'] = b'xxx' + store[root + 'a/b/c'] = b'xxx' with pytest.raises(KeyError): - del store['a/b/c'] + del store[root + 'a/b/c'] - store['d'] = b'ddd' + store[root + 'd'] = b'ddd' with pytest.raises(KeyError): - store['d/e/f'] = b'xxx' + store[root + 'd/e/f'] = b'xxx' with pytest.raises(KeyError): - del store['d/e/f'] + del store[root + 'd/e/f'] # test __setitem__ overwrite level - store['x/y/z'] = b'xxx' - store['x/y'] = b'yyy' - assert b'yyy' == ensure_bytes(store['x/y']) - assert 'x/y/z' not in store - store['x'] = b'zzz' - assert b'zzz' == ensure_bytes(store['x']) - assert 'x/y' not in store + store[root + 'x/y/z'] = b'xxx' + store[root + 'x/y'] = b'yyy' + assert b'yyy' == ensure_bytes(store[root + 'x/y']) + assert root + 'x/y/z' not in store + store[root + 'x'] = b'zzz' + assert b'zzz' == ensure_bytes(store[root + 'x']) + assert root + 'x/y' not in store # test __delitem__ overwrite level - store['r/s/t'] = b'xxx' - del store['r/s'] - assert 'r/s/t' not in store - store['r/s'] = b'xxx' - del store['r'] - assert 'r/s' not in store + store[root + 'r/s/t'] = b'xxx' + del store[root + 'r/s'] + assert root + 'r/s/t' not in store + store[root + 'r/s'] = b'xxx' + del store[root + 'r'] + assert root + 'r/s' not in store class TestMemoryStore(StoreTests): @@ -806,12 +822,12 @@ def create_store(self, **kwargs): def test_store_contains_bytes(self): store = self.create_store() - store['foo'] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) - assert store['foo'] == b'abcde' + store[self.root + 'foo'] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) + assert store[self.root + 'foo'] == b'abcde' def test_setdel(self): store = self.create_store() - setdel_hierarchy_checks(store) + setdel_hierarchy_checks(store, self.root) class TestDictStore(StoreTests): @@ -884,19 +900,19 @@ def test_pickle_ext(self): assert store.path == store2.path # check point to same underlying directory - assert 'xxx' not in store - store2['xxx'] = b'yyy' - assert b'yyy' == ensure_bytes(store['xxx']) + assert self.root + 'xxx' not in store + store2[self.root + 'xxx'] = b'yyy' + assert b'yyy' == ensure_bytes(store[self.root + 'xxx']) def test_setdel(self): store = self.create_store() - setdel_hierarchy_checks(store) + setdel_hierarchy_checks(store, self.root) def test_normalize_keys(self): store = self.create_store(normalize_keys=True) - store['FOO'] = b'bar' - assert 'FOO' in store - assert 'foo' in store + store[self.root + 'FOO'] = b'bar' + assert self.root + 'FOO' in store + assert self.root + 'foo' in store def test_listing_keys_slash(self): @@ -980,12 +996,12 @@ def test_complex(self): assert not store assert not os.listdir(path1) assert not os.listdir(path2) - store['foo'] = b"hello" - assert 'foo' in os.listdir(path1) - assert 'foo' in store - assert not os.listdir(path2) - assert store["foo"] == b"hello" - assert 'foo' in os.listdir(path2) + store[self.root + 'foo'] = b"hello" + assert 'foo' in os.listdir(str(path1) + '/' + self.root) + assert self.root + 'foo' in store + assert not os.listdir(str(path2)) + assert store[self.root + "foo"] == b"hello" + assert 'foo' in os.listdir(str(path2)) def test_deep_ndim(self): import zarr @@ -1032,31 +1048,31 @@ def test_read_only(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = self.create_store(path=path) - store['foo'] = b"bar" + store[self.root + 'foo'] = b"bar" store = self.create_store(path=path, mode='r') with pytest.raises(PermissionError): - store['foo'] = b"hex" + store[self.root + 'foo'] = b"hex" with pytest.raises(PermissionError): - del store['foo'] + del store[self.root + 'foo'] with pytest.raises(PermissionError): - store.delitems(['foo']) + store.delitems([self.root + 'foo']) with pytest.raises(PermissionError): - store.setitems({'foo': b'baz'}) + store.setitems({self.root + 'foo': b'baz'}) with pytest.raises(PermissionError): store.clear() with pytest.raises(PermissionError): - store.rmdir("anydir") + store.rmdir(self.root + "anydir") - assert store['foo'] == b"bar" + assert store[self.root + 'foo'] == b"bar" - filepath = os.path.join(path, "foo") + filepath = os.path.join(path, self.root + "foo") with pytest.raises(ValueError): self.create_store(path=filepath, mode='r') @@ -1212,14 +1228,14 @@ def test_init_array(self): def test_chunk_nesting(self): store = self.create_store() # any path where last segment looks like a chunk key gets special handling - store['0.0'] = b'xxx' - assert b'xxx' == store['0.0'] + store[self.root + '0.0'] = b'xxx' + assert b'xxx' == store[self.root + '0.0'] # assert b'xxx' == store['0/0'] - store['foo/10.20.30'] = b'yyy' - assert b'yyy' == store['foo/10.20.30'] + store[self.root + 'foo/10.20.30'] = b'yyy' + assert b'yyy' == store[self.root + 'foo/10.20.30'] # assert b'yyy' == store['foo/10/20/30'] - store['42'] = b'zzz' - assert b'zzz' == store['42'] + store[self.root + '42'] = b'zzz' + assert b'zzz' == store[self.root + '42'] class TestNestedDirectoryStoreNone: @@ -1494,7 +1510,7 @@ def create_store(self, **kwargs): def test_setdel(self): store = self.create_store() - setdel_hierarchy_checks(store) + setdel_hierarchy_checks(store, self.root) class TestZipStore(StoreTests): @@ -1526,8 +1542,8 @@ def test_flush(self): def test_context_manager(self): with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'qux' assert 2 == len(store) def test_pop(self): @@ -1573,8 +1589,8 @@ def create_store(self, dimension_separator=None): def test_context_manager(self): with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'qux' assert 2 == len(store) @@ -1633,8 +1649,8 @@ def create_store(self, **kwargs): def test_context_manager(self): with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'qux' assert 2 == len(store) @@ -1668,8 +1684,8 @@ def test_pickle(self): # setup store store = self.create_store() - store['foo'] = b'bar' - store['baz'] = b'quux' + store[self.root + 'foo'] = b'bar' + store[self.root + 'baz'] = b'quux' # round-trip through pickle with pytest.raises(PicklingError): @@ -1703,199 +1719,209 @@ def create_store(self, **kwargs): class TestLRUStoreCache(StoreTests): + CountingClass = CountingDict + LRUStoreClass = LRUStoreCache + def create_store(self, **kwargs): # wrapper therefore no dimension_separator argument skip_if_nested_chunks(**kwargs) - return LRUStoreCache(dict(), max_size=2**27) + return self.LRUStoreClass(dict(), max_size=2**27) def test_cache_values_no_max_size(self): # setup store - store = CountingDict() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] - assert 1 == store.counter['__setitem__', 'bar'] + store = self.CountingClass() + foo_key = self.root + 'foo' + bar_key = self.root + 'bar' + store[foo_key] = b'xxx' + store[bar_key] = b'yyy' + assert 0 == store.counter['__getitem__', foo_key] + assert 1 == store.counter['__setitem__', foo_key] + assert 0 == store.counter['__getitem__', bar_key] + assert 1 == store.counter['__setitem__', bar_key] # setup cache - cache = LRUStoreCache(store, max_size=None) + cache = self.LRUStoreClass(store, max_size=None) assert 0 == cache.hits assert 0 == cache.misses # test first __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] + assert 1 == store.counter['__setitem__', foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] + assert 1 == store.counter['__setitem__', foo_key] assert 1 == cache.hits assert 1 == cache.misses # test __setitem__, __getitem__ - cache['foo'] = b'zzz' - assert 1 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] + cache[foo_key] = b'zzz' + assert 1 == store.counter['__getitem__', foo_key] + assert 2 == store.counter['__setitem__', foo_key] # should be a cache hit - assert b'zzz' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] + assert b'zzz' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] + assert 2 == store.counter['__setitem__', foo_key] assert 2 == cache.hits assert 1 == cache.misses # manually invalidate all cached values cache.invalidate_values() - assert b'zzz' == cache['foo'] - assert 2 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] + assert b'zzz' == cache[foo_key] + assert 2 == store.counter['__getitem__', foo_key] + assert 2 == store.counter['__setitem__', foo_key] cache.invalidate() - assert b'zzz' == cache['foo'] - assert 3 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] + assert b'zzz' == cache[foo_key] + assert 3 == store.counter['__getitem__', foo_key] + assert 2 == store.counter['__setitem__', foo_key] # test __delitem__ - del cache['foo'] + del cache[foo_key] with pytest.raises(KeyError): # noinspection PyStatementEffect - cache['foo'] + cache[foo_key] with pytest.raises(KeyError): # noinspection PyStatementEffect - store['foo'] + store[foo_key] # verify other keys untouched - assert 0 == store.counter['__getitem__', 'bar'] - assert 1 == store.counter['__setitem__', 'bar'] + assert 0 == store.counter['__getitem__', bar_key] + assert 1 == store.counter['__setitem__', bar_key] def test_cache_values_with_max_size(self): # setup store - store = CountingDict() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] + store = self.CountingClass() + foo_key = self.root + 'foo' + bar_key = self.root + 'bar' + store[foo_key] = b'xxx' + store[bar_key] = b'yyy' + assert 0 == store.counter['__getitem__', foo_key] + assert 0 == store.counter['__getitem__', bar_key] # setup cache - can only hold one item - cache = LRUStoreCache(store, max_size=5) + cache = self.LRUStoreClass(store, max_size=5) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 1 == store.counter['__getitem__', bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 1 == store.counter['__getitem__', bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should have been evicted, cache miss - assert b'xxx' == cache['foo'] - assert 2 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 2 == store.counter['__getitem__', foo_key] assert 2 == cache.hits assert 3 == cache.misses # test 'bar' __getitem__, should have been evicted, cache miss - assert b'yyy' == cache['bar'] - assert 2 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 2 == store.counter['__getitem__', bar_key] assert 2 == cache.hits assert 4 == cache.misses # setup store - store = CountingDict() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] + store = self.CountingClass() + store[foo_key] = b'xxx' + store[bar_key] = b'yyy' + assert 0 == store.counter['__getitem__', foo_key] + assert 0 == store.counter['__getitem__', bar_key] # setup cache - can hold two items - cache = LRUStoreCache(store, max_size=6) + cache = self.LRUStoreClass(store, max_size=6) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 1 == store.counter['__getitem__', bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 1 == store.counter['__getitem__', bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should still be cached - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] + assert b'xxx' == cache[foo_key] + assert 1 == store.counter['__getitem__', foo_key] assert 3 == cache.hits assert 2 == cache.misses # test 'bar' __getitem__, should still be cached - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] + assert b'yyy' == cache[bar_key] + assert 1 == store.counter['__getitem__', bar_key] assert 4 == cache.hits assert 2 == cache.misses def test_cache_keys(self): # setup - store = CountingDict() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__contains__', 'foo'] + store = self.CountingClass() + foo_key = self.root + 'foo' + bar_key = self.root + 'bar' + baz_key = self.root + 'baz' + store[foo_key] = b'xxx' + store[bar_key] = b'yyy' + assert 0 == store.counter['__contains__', foo_key] assert 0 == store.counter['__iter__'] assert 0 == store.counter['keys'] - cache = LRUStoreCache(store, max_size=None) + cache = self.LRUStoreClass(store, max_size=None) # keys should be cached on first call keys = sorted(cache.keys()) - assert keys == ['bar', 'foo'] + assert keys == [bar_key, foo_key] assert 1 == store.counter['keys'] # keys should now be cached assert keys == sorted(cache.keys()) assert 1 == store.counter['keys'] - assert 'foo' in cache - assert 0 == store.counter['__contains__', 'foo'] + assert foo_key in cache + assert 0 == store.counter['__contains__', foo_key] assert keys == sorted(cache) assert 0 == store.counter['__iter__'] assert 1 == store.counter['keys'] # cache should be cleared if store is modified - crude but simple for now - cache['baz'] = b'zzz' + cache[baz_key] = b'zzz' keys = sorted(cache.keys()) - assert keys == ['bar', 'baz', 'foo'] + assert keys == [bar_key, baz_key, foo_key] assert 2 == store.counter['keys'] # keys should now be cached assert keys == sorted(cache.keys()) @@ -1904,25 +1930,25 @@ def test_cache_keys(self): # manually invalidate keys cache.invalidate_keys() keys = sorted(cache.keys()) - assert keys == ['bar', 'baz', 'foo'] + assert keys == [bar_key, baz_key, foo_key] assert 3 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__contains__', foo_key] assert 0 == store.counter['__iter__'] cache.invalidate_keys() keys = sorted(cache) - assert keys == ['bar', 'baz', 'foo'] + assert keys == [bar_key, baz_key, foo_key] assert 4 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__contains__', foo_key] assert 0 == store.counter['__iter__'] cache.invalidate_keys() - assert 'foo' in cache + assert foo_key in cache assert 5 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] + assert 0 == store.counter['__contains__', foo_key] assert 0 == store.counter['__iter__'] # check these would get counted if called directly - assert 'foo' in store - assert 1 == store.counter['__contains__', 'foo'] + assert foo_key in store + assert 1 == store.counter['__contains__', foo_key] assert keys == sorted(store) assert 1 == store.counter['__iter__'] diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 4f0f553965..f10d49c060 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -20,7 +20,7 @@ from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var from .test_storage import ( - StoreTests, + StoreTests as _StoreTests, TestMemoryStore as _TestMemoryStore, TestDirectoryStore as _TestDirectoryStore, TestFSStore as _TestFSStore, @@ -137,7 +137,10 @@ def test_validate_key(): store._validate_key(invalid) -class StoreV3Tests(StoreTests): +class StoreV3Tests(_StoreTests): + + version = 3 + root = 'meta/root/' def test_getsize(self): # TODO: determine proper getsize() behavior for v3 @@ -172,9 +175,147 @@ def test_getsize(self): store.close() + # Note: Except for a couple of getsize results this method could be moved + # back to the v2 class. Determine what is the correct behavior here. + # noinspection PyStatementEffect def test_hierarchy(self): - pytest.skip("TODO: adapt v2 test_hierarchy tests to v3") + # setup + + store = self.create_store() + store[self.root + 'a'] = b'aaa' + store[self.root + 'b'] = b'bbb' + store[self.root + 'c/d'] = b'ddd' + store[self.root + 'c/e/f'] = b'fff' + store[self.root + 'c/e/g'] = b'ggg' + + # check keys + assert self.root + 'a' in store + assert self.root + 'b' in store + assert self.root + 'c/d' in store + assert self.root + 'c/e/f' in store + assert self.root + 'c/e/g' in store + assert self.root + 'c' not in store + assert self.root + 'c/' not in store + assert self.root + 'c/e' not in store + assert self.root + 'c/e/' not in store + assert self.root + 'c/d/x' not in store + + # check __getitem__ + with pytest.raises(KeyError): + store[self.root + 'c'] + with pytest.raises(KeyError): + store[self.root + 'c/e'] + with pytest.raises(KeyError): + store[self.root + 'c/d/x'] + + # test getsize (optional) + if hasattr(store, 'getsize'): + # TODO: proper behavior of getsize? + # v3 returns size of all nested arrays, not just the + # size of the arrays in the current folder. + assert 15 == store.getsize() # == 6 for v2 + assert 3 == store.getsize('a') + assert 3 == store.getsize('b') + assert 9 == store.getsize('c') # == 3 for v2 + assert 3 == store.getsize('c/d') + assert 6 == store.getsize('c/e') + assert 3 == store.getsize('c/e/f') + assert 3 == store.getsize('c/e/g') + # non-existent paths + assert 0 == store.getsize('x') + assert 0 == store.getsize('a/x') + assert 0 == store.getsize('c/x') + assert 0 == store.getsize('c/x/y') + assert 0 == store.getsize('c/d/y') + assert 0 == store.getsize('c/d/y/z') + + # test listdir (optional) + if hasattr(store, 'listdir'): + assert {'a', 'b', 'c'} == set(store.listdir(self.root)) + assert {'d', 'e'} == set(store.listdir(self.root + 'c')) + assert {'f', 'g'} == set(store.listdir(self.root + 'c/e')) + # no exception raised if path does not exist or is leaf + assert [] == store.listdir(self.root + 'x') + assert [] == store.listdir(self.root + 'a/x') + assert [] == store.listdir(self.root + 'c/x') + assert [] == store.listdir(self.root + 'c/x/y') + assert [] == store.listdir(self.root + 'c/d/y') + assert [] == store.listdir(self.root + 'c/d/y/z') + assert [] == store.listdir(self.root + 'c/e/f') + + # test rename (optional) + if store.is_erasable(): + store.rename("c/e", "c/e2") + assert self.root + "c/d" in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store + assert self.root + "c/e2" not in store + assert self.root + "c/e2/f" in store + assert self.root + "c/e2/g" in store + store.rename("c/e2", "c/e") + assert self.root + "c/d" in store + assert self.root + "c/e2" not in store + assert self.root + "c/e2/f" not in store + assert self.root + "c/e2/g" not in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" in store + assert self.root + "c/e/g" in store + store.rename("c", "c1/c2/c3") + assert self.root + "a" in store + assert self.root + "c" not in store + assert self.root + "c/d" not in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store + assert self.root + "c1" not in store + assert self.root + "c1/c2" not in store + assert self.root + "c1/c2/c3" not in store + assert self.root + "c1/c2/c3/d" in store + assert self.root + "c1/c2/c3/e" not in store + assert self.root + "c1/c2/c3/e/f" in store + assert self.root + "c1/c2/c3/e/g" in store + store.rename("c1/c2/c3", "c") + assert self.root + "c" not in store + assert self.root + "c/d" in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" in store + assert self.root + "c/e/g" in store + assert self.root + "c1" not in store + assert self.root + "c1/c2" not in store + assert self.root + "c1/c2/c3" not in store + assert self.root + "c1/c2/c3/d" not in store + assert self.root + "c1/c2/c3/e" not in store + assert self.root + "c1/c2/c3/e/f" not in store + assert self.root + "c1/c2/c3/e/g" not in store + + # test rmdir (optional) + store.rmdir("c/e") + assert self.root + "c/d" in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store + store.rmdir("c") + assert self.root + "c/d" not in store + store.rmdir() + assert self.root + 'a' not in store + assert self.root + 'b' not in store + store[self.root + 'a'] = b'aaa' + store[self.root + 'c/d'] = b'ddd' + store[self.root + 'c/e/f'] = b'fff' + # no exceptions raised if path does not exist or is leaf + store.rmdir('x') + store.rmdir('a/x') + store.rmdir('c/x') + store.rmdir('c/x/y') + store.rmdir('c/d/y') + store.rmdir('c/d/y/z') + store.rmdir('c/e/f') + assert self.root + 'a' in store + assert self.root + 'c/d' in store + assert self.root + 'c/e/f' in store + + store.close() def test_init_array(self, dimension_separator_fixture_v3): @@ -451,6 +592,23 @@ def _test_init_group_overwrite_chunk_store(self, order): "In v3 array and group names cannot overlap" ) + def test_list_prefix(self): + + store = self.create_store() + path = 'arr1' + init_array(store, path=path, shape=1000, chunks=100) + + expected = ['meta/root/arr1.array.json', 'zarr.json'] + assert sorted(store.list_prefix('')) == expected + + expected = ['meta/root/arr1.array.json'] + assert sorted(store.list_prefix('meta/root')) == expected + + # cannot start prefix with '/' + with pytest.raises(ValueError): + store.list_prefix(prefix='/meta/root') + + class TestMappingStoreV3(StoreV3Tests): @@ -566,7 +724,7 @@ def test_init_array(self): init_array(store, path=path, shape=1000, chunks=100) # check metadata - array_meta_key = 'meta/root/' + path + '.array.json' + array_meta_key = self.root + path + '.array.json' assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert (1000,) == meta['shape'] @@ -707,228 +865,8 @@ def create_store(self, **kwargs): class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): - def create_store(self, **kwargs): - # wrapper therefore no dimension_separator argument - skip_if_nested_chunks(**kwargs) - return LRUStoreCacheV3(dict(), max_size=2**27) - - def test_cache_values_no_max_size(self): - - # setup store - store = CountingDictV3() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] - assert 1 == store.counter['__setitem__', 'bar'] - - # setup cache - cache = LRUStoreCacheV3(store, max_size=None) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == store.counter['__setitem__', 'foo'] - assert 1 == cache.hits - assert 1 == cache.misses - - # test __setitem__, __getitem__ - cache['foo'] = b'zzz' - assert 1 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] - # should be a cache hit - assert b'zzz' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] - assert 2 == cache.hits - assert 1 == cache.misses - - # manually invalidate all cached values - cache.invalidate_values() - assert b'zzz' == cache['foo'] - assert 2 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] - cache.invalidate() - assert b'zzz' == cache['foo'] - assert 3 == store.counter['__getitem__', 'foo'] - assert 2 == store.counter['__setitem__', 'foo'] - - # test __delitem__ - del cache['foo'] - with pytest.raises(KeyError): - # noinspection PyStatementEffect - cache['foo'] - with pytest.raises(KeyError): - # noinspection PyStatementEffect - store['foo'] - - # verify other keys untouched - assert 0 == store.counter['__getitem__', 'bar'] - assert 1 == store.counter['__setitem__', 'bar'] - - def test_cache_values_with_max_size(self): - - # setup store - store = CountingDictV3() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] - # setup cache - can only hold one item - cache = LRUStoreCacheV3(store, max_size=5) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == cache.hits - assert 1 == cache.misses - - # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] - assert 1 == cache.hits - assert 2 == cache.misses - - # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] - assert 2 == cache.hits - assert 2 == cache.misses - - # test 'foo' __getitem__, should have been evicted, cache miss - assert b'xxx' == cache['foo'] - assert 2 == store.counter['__getitem__', 'foo'] - assert 2 == cache.hits - assert 3 == cache.misses - - # test 'bar' __getitem__, should have been evicted, cache miss - assert b'yyy' == cache['bar'] - assert 2 == store.counter['__getitem__', 'bar'] - assert 2 == cache.hits - assert 4 == cache.misses - - # setup store - store = CountingDictV3() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__getitem__', 'foo'] - assert 0 == store.counter['__getitem__', 'bar'] - # setup cache - can hold two items - cache = LRUStoreCacheV3(store, max_size=6) - assert 0 == cache.hits - assert 0 == cache.misses - - # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 0 == cache.hits - assert 1 == cache.misses - - # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 1 == cache.hits - assert 1 == cache.misses - - # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] - assert 1 == cache.hits - assert 2 == cache.misses - - # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] - assert 2 == cache.hits - assert 2 == cache.misses - - # test 'foo' __getitem__, should still be cached - assert b'xxx' == cache['foo'] - assert 1 == store.counter['__getitem__', 'foo'] - assert 3 == cache.hits - assert 2 == cache.misses - - # test 'bar' __getitem__, should still be cached - assert b'yyy' == cache['bar'] - assert 1 == store.counter['__getitem__', 'bar'] - assert 4 == cache.hits - assert 2 == cache.misses - - def test_cache_keys(self): - - # setup - store = CountingDictV3() - store['foo'] = b'xxx' - store['bar'] = b'yyy' - assert 0 == store.counter['__contains__', 'foo'] - assert 0 == store.counter['__iter__'] - assert 0 == store.counter['keys'] - cache = LRUStoreCacheV3(store, max_size=None) - - # keys should be cached on first call - keys = sorted(cache.keys()) - assert keys == ['bar', 'foo'] - assert 1 == store.counter['keys'] - # keys should now be cached - assert keys == sorted(cache.keys()) - assert 1 == store.counter['keys'] - assert 'foo' in cache - assert 0 == store.counter['__contains__', 'foo'] - assert keys == sorted(cache) - assert 0 == store.counter['__iter__'] - assert 1 == store.counter['keys'] - - # cache should be cleared if store is modified - crude but simple for now - cache['baz'] = b'zzz' - keys = sorted(cache.keys()) - assert keys == ['bar', 'baz', 'foo'] - assert 2 == store.counter['keys'] - # keys should now be cached - assert keys == sorted(cache.keys()) - assert 2 == store.counter['keys'] - - # manually invalidate keys - cache.invalidate_keys() - keys = sorted(cache.keys()) - assert keys == ['bar', 'baz', 'foo'] - assert 3 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] - assert 0 == store.counter['__iter__'] - cache.invalidate_keys() - keys = sorted(cache) - assert keys == ['bar', 'baz', 'foo'] - assert 4 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] - assert 0 == store.counter['__iter__'] - cache.invalidate_keys() - assert 'foo' in cache - assert 5 == store.counter['keys'] - assert 0 == store.counter['__contains__', 'foo'] - assert 0 == store.counter['__iter__'] - - # check these would get counted if called directly - assert 'foo' in store - assert 1 == store.counter['__contains__', 'foo'] - assert keys == sorted(store) - assert 1 == store.counter['__iter__'] + CountingClass = CountingDictV3 + LRUStoreClass = LRUStoreCacheV3 # TODO: implement ABSStoreV3 From f4bedd6f33ee730a52cf12cdc095078fe26b5d33 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 23:14:03 -0500 Subject: [PATCH 042/109] Fix core V3 tests now that keys are validated on __setitem__ --- zarr/tests/test_core_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_core_v3.py b/zarr/tests/test_core_v3.py index 8f8f8b5298..bdbef54d5d 100644 --- a/zarr/tests/test_core_v3.py +++ b/zarr/tests/test_core_v3.py @@ -240,7 +240,7 @@ def test_nchunks_initialized(self): z = self.create_array(shape=100, chunks=10) assert 0 == z.nchunks_initialized # manually put something into the store to confuse matters - z.store['foo'] = b'bar' + z.store['meta/root/foo'] = b'bar' assert 0 == z.nchunks_initialized z[:] = 42 assert 10 == z.nchunks_initialized From 95c0a2218e7477790d964a4e4049999be36f6d14 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 23:15:35 -0500 Subject: [PATCH 043/109] pep8 in storage_v3 tests --- zarr/tests/test_storage_v3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index f10d49c060..8f608bb564 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -81,6 +81,7 @@ class InvalidDummyStore(): def keys(self): pass + def test_ensure_store_v3(): class InvalidStore: pass From e321c12dfa60db483b371807bdebb85ccb0b91f8 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 23:16:27 -0500 Subject: [PATCH 044/109] flake8 in test_convenience.py --- zarr/tests/test_convenience.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 59e6a34ed1..265ebc0c5d 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -173,11 +173,11 @@ def test_load_array(zarr_version): def test_tree(zarr_version): kwargs = _init_creation_kwargs(zarr_version) g1 = zarr.group(**kwargs) - g2 = g1.create_group('foo') + g1.create_group('foo') g3 = g1.create_group('bar') - g4 = g3.create_group('baz') + g3.create_group('baz') g5 = g3.create_group('qux') - d1 = g5.create_dataset('baz', shape=100, chunks=10) + g5.create_dataset('baz', shape=100, chunks=10) assert repr(zarr.tree(g1)) == repr(g1.tree()) assert str(zarr.tree(g1)) == str(g1.tree()) From 2d9058e01c6b51914e1c670825d5948cf66bdba7 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 19 Dec 2021 23:18:11 -0500 Subject: [PATCH 045/109] pep8 --- zarr/_storage/store.py | 1 - zarr/storage.py | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index e60f768f83..9fc8c9fa17 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,5 +1,4 @@ import abc -import sys from collections.abc import MutableMapping from string import ascii_letters, digits from typing import Any, List, Optional, Union diff --git a/zarr/storage.py b/zarr/storage.py index 52a4ed2b4b..f15e03f598 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -138,10 +138,10 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) return store elif hasattr(store, '_store_version') and store._store_version != zarr_version: - raise ValueError( - f"store is a zarr v{store._store_version} store which conflicts " - f"with the specified zarr_version ({zarr_version})." - ) + raise ValueError( + f"store is a zarr v{store._store_version} store which conflicts " + f"with the specified zarr_version ({zarr_version})." + ) if isinstance(store, os.PathLike): store = os.fspath(store) @@ -2960,6 +2960,7 @@ def __eq__(self, other): self._mutable_mapping == other._mutable_mapping ) + KVStoreV3.__doc__ = KVStore.__doc__ @@ -3103,6 +3104,7 @@ def rmdir(self, path: Path = None): # clear out root self.root = self.cls() + MemoryStoreV3.__doc__ = MemoryStore.__doc__ @@ -3176,6 +3178,7 @@ def rmdir(self, path=None): elif os.path.isdir(dir_path): shutil.rmtree(dir_path) + DirectoryStoreV3.__doc__ = DirectoryStore.__doc__ From 63d8d27fc914eb6e55edfc8dd766cc0a31526831 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 10:36:30 -0500 Subject: [PATCH 046/109] fix test_attrs.py validate_key requires attr key to start with meta/ or data/ in v3 --- zarr/attrs.py | 8 +-- zarr/tests/test_attrs.py | 139 ++++++++++++++++++++------------------- 2 files changed, 75 insertions(+), 72 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index ff3caea958..39683d45d9 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -28,8 +28,6 @@ def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): self._version = getattr(store, '_store_version', 2) - assert key - _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) self.key = key @@ -63,10 +61,10 @@ def asdict(self): def refresh(self): """Refresh cached attributes from the store.""" if self.cache: - if self._version == 3: - self._cached_asdict = self._get_nosync()['attributes'] - else: + if self._version == 2: self._cached_asdict = self._get_nosync() + else: + self._cached_asdict = self._get_nosync()['attributes'] def __contains__(self, x): return x in self.asdict() diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 62faf662da..03625c9bae 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -21,13 +21,16 @@ def _init_store(version): class TestAttributes(): - def init_attributes(self, store, read_only=False, cache=True): - return Attributes(store, key='attrs', read_only=read_only, cache=cache) + def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): + root = '.z' if zarr_version == 2 else 'meta/root/' + return Attributes(store, key=root + 'attrs', read_only=read_only, cache=cache) def test_storage(self, zarr_version): store = _init_store(zarr_version) - a = Attributes(store=store, key='attrs') + root = '.z' if zarr_version == 2 else 'meta/root/' + attrs_key = root + 'attrs' + a = Attributes(store=store, key=attrs_key) assert isinstance(a.store, KVStore) assert 'foo' not in a assert 'bar' not in a @@ -35,9 +38,9 @@ def test_storage(self, zarr_version): a['foo'] = 'bar' a['baz'] = 42 - assert 'attrs' in store - assert isinstance(store['attrs'], bytes) - d = json.loads(str(store['attrs'], 'ascii')) + assert attrs_key in store + assert isinstance(store[attrs_key], bytes) + d = json.loads(str(store[attrs_key], 'ascii')) if zarr_version == 3: d = d['attributes'] assert dict(foo='bar', baz=42) == d @@ -45,7 +48,7 @@ def test_storage(self, zarr_version): def test_get_set_del_contains(self, zarr_version): store = _init_store(zarr_version) - a = self.init_attributes(store) + a = self.init_attributes(store, zarr_version=zarr_version) assert 'foo' not in a a['foo'] = 'bar' a['baz'] = 42 @@ -62,7 +65,7 @@ def test_get_set_del_contains(self, zarr_version): def test_update_put(self, zarr_version): store = _init_store(zarr_version) - a = self.init_attributes(store) + a = self.init_attributes(store, zarr_version=zarr_version) assert 'foo' not in a assert 'bar' not in a assert 'baz' not in a @@ -80,7 +83,7 @@ def test_update_put(self, zarr_version): def test_iterators(self, zarr_version): store = _init_store(zarr_version) - a = self.init_attributes(store) + a = self.init_attributes(store, zarr_version=zarr_version) assert 0 == len(a) assert set() == set(a) assert set() == set(a.keys()) @@ -98,11 +101,11 @@ def test_iterators(self, zarr_version): def test_read_only(self, zarr_version): store = _init_store(zarr_version) - a = self.init_attributes(store, read_only=True) + a = self.init_attributes(store, read_only=True, zarr_version=zarr_version) if zarr_version == 2: - store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') + store['.zattrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') else: - store['attrs'] = json.dumps(dict(attributes=dict(foo='bar', baz=42))).encode('ascii') + store['meta/root/attrs'] = json.dumps(dict(attributes=dict(foo='bar', baz=42))).encode('ascii') assert a['foo'] == 'bar' assert a['baz'] == 42 with pytest.raises(PermissionError): @@ -114,7 +117,7 @@ def test_read_only(self, zarr_version): def test_key_completions(self, zarr_version): store = _init_store(zarr_version) - a = self.init_attributes(store) + a = self.init_attributes(store, zarr_version=zarr_version) d = a._ipython_key_completions_() assert 'foo' not in d assert '123' not in d @@ -134,128 +137,130 @@ def test_caching_on(self, zarr_version): # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - assert 0 == store.counter['__getitem__', 'attrs'] - assert 0 == store.counter['__setitem__', 'attrs'] + attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' + assert 0 == store.counter['__getitem__', attrs_key] + assert 0 == store.counter['__setitem__', attrs_key] if zarr_version == 2: - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') else: - store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', 'attrs'] - assert 1 == store.counter['__setitem__', 'attrs'] + store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') + assert 0 == store.counter['__getitem__', attrs_key] + assert 1 == store.counter['__setitem__', attrs_key] # setup attributes - a = self.init_attributes(store) + a = self.init_attributes(store, zarr_version=zarr_version) # test __getitem__ causes all attributes to be cached assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', 'attrs'] + assert 1 == store.counter['__getitem__', attrs_key] assert a['bar'] == 42 - assert 1 == store.counter['__getitem__', 'attrs'] + assert 1 == store.counter['__getitem__', attrs_key] assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', 'attrs'] + assert 1 == store.counter['__getitem__', attrs_key] # test __setitem__ updates the cache a['foo'] = 'yyy' get_cnt = 2 if zarr_version == 2 else 3 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 2 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 2 == store.counter['__setitem__', attrs_key] assert a['foo'] == 'yyy' - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 2 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 2 == store.counter['__setitem__', attrs_key] # test update() updates the cache a.update(foo='zzz', bar=84) get_cnt = 3 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] assert a['foo'] == 'zzz' assert a['bar'] == 84 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] # test __contains__ uses the cache assert 'foo' in a - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] assert 'spam' not in a - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] # test __delitem__ updates the cache del a['bar'] get_cnt = 4 if zarr_version == 2 else 7 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 4 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 4 == store.counter['__setitem__', attrs_key] assert 'bar' not in a - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 4 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 4 == store.counter['__setitem__', attrs_key] # test refresh() if zarr_version == 2: - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') else: - store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert get_cnt == store.counter['__getitem__', 'attrs'] + store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') + assert get_cnt == store.counter['__getitem__', attrs_key] a.refresh() get_cnt = 5 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] assert a['foo'] == 'xxx' - assert get_cnt == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] assert a['bar'] == 42 - assert get_cnt == store.counter['__getitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] def test_caching_off(self, zarr_version): # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - assert 0 == store.counter['__getitem__', 'attrs'] - assert 0 == store.counter['__setitem__', 'attrs'] + attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' + assert 0 == store.counter['__getitem__', attrs_key] + assert 0 == store.counter['__setitem__', attrs_key] if zarr_version == 2: - store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') else: - store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', 'attrs'] - assert 1 == store.counter['__setitem__', 'attrs'] + store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') + assert 0 == store.counter['__getitem__', attrs_key] + assert 1 == store.counter['__setitem__', attrs_key] # setup attributes - a = self.init_attributes(store, cache=False) + a = self.init_attributes(store, cache=False, zarr_version=zarr_version) # test __getitem__ assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', 'attrs'] + assert 1 == store.counter['__getitem__', attrs_key] assert a['bar'] == 42 - assert 2 == store.counter['__getitem__', 'attrs'] + assert 2 == store.counter['__getitem__', attrs_key] assert a['foo'] == 'xxx' - assert 3 == store.counter['__getitem__', 'attrs'] + assert 3 == store.counter['__getitem__', attrs_key] # test __setitem__ a['foo'] = 'yyy' get_cnt = 4 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 2 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 2 == store.counter['__setitem__', attrs_key] assert a['foo'] == 'yyy' get_cnt = 5 if zarr_version == 2 else 6 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 2 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 2 == store.counter['__setitem__', attrs_key] # test update() a.update(foo='zzz', bar=84) get_cnt = 6 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] assert a['foo'] == 'zzz' assert a['bar'] == 84 get_cnt = 8 if zarr_version == 2 else 10 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] # test __contains__ assert 'foo' in a get_cnt = 9 if zarr_version == 2 else 11 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] assert 'spam' not in a get_cnt = 10 if zarr_version == 2 else 12 - assert get_cnt == store.counter['__getitem__', 'attrs'] - assert 3 == store.counter['__setitem__', 'attrs'] + assert get_cnt == store.counter['__getitem__', attrs_key] + assert 3 == store.counter['__setitem__', attrs_key] From 8417dc314b72250fef8e797c6cb8d2fe75c93b62 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 10:47:14 -0500 Subject: [PATCH 047/109] Fix SQLLiteStore changes to rmdir were intended for SQLLiteStoreV3 not SQLLiteStore --- zarr/storage.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index f15e03f598..4e4cfd2c51 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2667,11 +2667,10 @@ def getsize(self, path=None): def rmdir(self, path=None): path = normalize_storage_path(path) if path: - for base in ['meta/root/', 'data/root/']: - with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) - ) + with self.lock: + self.cursor.execute( + 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,) + ) else: self.clear() @@ -3327,6 +3326,17 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path=None): + path = normalize_storage_path(path) + if path: + for base in ['meta/root/', 'data/root/']: + with self.lock: + self.cursor.execute( + 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) + ) + else: + self.clear() + SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ From b9c9ed4b506ec72a5ed230bb1653068431a96e9b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 10:53:14 -0500 Subject: [PATCH 048/109] fix failing hierarchy test --- zarr/tests/test_hierarchy.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 046995fd3e..b54f3211cc 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1629,7 +1629,12 @@ def test_group_key_completions(zarr_version): g.zeros('yyy', shape=100) g.zeros('zzz', shape=100) g.zeros('456', shape=100) - g.zeros('asdf;', shape=100) + if zarr_version == 2: + g.zeros('asdf;', shape=100) + else: + # cannot have ; in key name for v3 + with pytest.raises(ValueError): + g.zeros('asdf;', shape=100) d = dir(g) # noinspection PyProtectedMember @@ -1644,7 +1649,8 @@ def test_group_key_completions(zarr_version): assert 'zzz' in d assert '123' not in d # not valid identifier assert '456' not in d # not valid identifier - assert 'asdf;' not in d # not valid identifier + if zarr_version == 2: + assert 'asdf;' not in d # not valid identifier assert 'foo' in k assert 'bar' in k @@ -1655,7 +1661,8 @@ def test_group_key_completions(zarr_version): assert 'zzz' in k assert '123' in k assert '456' in k - assert 'asdf;' in k + if zarr_version == 2: + assert 'asdf;' in k def _check_tree(g, expect_bytes, expect_text): From efb0b38d144e1083822be7abe108a906c011c8a6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 11:59:33 -0500 Subject: [PATCH 049/109] update ZipStore tests to make sure they all run on V3 --- zarr/_storage/store.py | 30 ++++++++++++++++++++++- zarr/storage.py | 40 +++++++++++-------------------- zarr/tests/test_storage.py | 45 ++++++++++++++++++++++------------- zarr/tests/test_storage_v3.py | 11 ++------- 4 files changed, 73 insertions(+), 53 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 9fc8c9fa17..2d07027866 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -132,7 +132,6 @@ def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) _rmdir_from_keys(self, path) - _valid_key_characters = set(ascii_letters + digits + "/.-_") @@ -287,6 +286,14 @@ def _ensure_store(store): f"Zarr.storage.KVStoreV3. Got {store}" ) + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + # allow MutableMapping for backwards compatibility StoreLike = Union[BaseStore, MutableMapping] @@ -352,6 +359,27 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: del store[key] +def _rmdir_from_keys_v3(store: BaseStore, path: Optional[str]="") -> None: + + meta_dir = 'meta/root/' + path + meta_dir = meta_dir.rstrip('/') + _rmdir_from_keys(store, meta_dir) + + # remove data folder + data_dir = 'data/root/' + path + data_dir = data_dir.rstrip('/') + _rmdir_from_keys(store, data_dir) + + # remove metadata files + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in store: + store.erase(array_meta_file) # type: ignore + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in store: + store.erase(group_meta_file) # type: ignore + + def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) diff --git a/zarr/storage.py b/zarr/storage.py index 4e4cfd2c51..2800d61b16 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -62,6 +62,7 @@ _listdir_from_keys, _rename_from_keys, _rmdir_from_keys, + _rmdir_from_keys_v3, _path_to_prefix, _prefix_to_array_key, _prefix_to_group_key, @@ -199,33 +200,16 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if getattr(store, '_store_version', 2) == 2: - if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore - # pass through - store.rmdir(path) # type: ignore - else: - # slow version, delete one key at a time - _rmdir_from_keys(store, path) + store_version = getattr(store, '_store_version', 2) == 2 + if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore + # pass through + store.rmdir(path) # type: ignore else: - # TODO: check behavior for v3 and fix in the Store class, deferring to - # those by default - - # remove metadata folder - meta_dir = 'meta/root/' + path - _rmdir_from_keys(store, meta_dir) - - # remove data folder - data_dir = 'data/root/' + path - _rmdir_from_keys(store, data_dir) - - # remove metadata files - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - array_meta_file = meta_dir + '.array' + sfx - if array_meta_file in store: - store.erase(array_meta_file) # type: ignore - group_meta_file = meta_dir + '.group' + sfx - if group_meta_file in store: - store.erase(group_meta_file) # type: ignore + # slow version, delete one key at a time + if store_version == 2: + _rmdir_from_keys(store, path) + else: + _rmdir_from_keys_v3(store, path) def rename(store: Store, src_path: Path, dst_path: Path): @@ -2959,6 +2943,10 @@ def __eq__(self, other): self._mutable_mapping == other._mutable_mapping ) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + KVStoreV3.__doc__ = KVStore.__doc__ diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 9015cc99c7..06e2156bed 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1515,6 +1515,8 @@ def test_setdel(self): class TestZipStore(StoreTests): + ZipStoreClass = ZipStore + def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.zip') atexit.register(os.remove, path) @@ -1522,22 +1524,22 @@ def create_store(self, **kwargs): return store def test_mode(self): - with ZipStore('data/store.zip', mode='w') as store: - store['foo'] = b'bar' - store = ZipStore('data/store.zip', mode='r') + with self.ZipStoreClass('data/store.zip', mode='w') as store: + store[self.root + 'foo'] = b'bar' + store = self.ZipStoreClass('data/store.zip', mode='r') with pytest.raises(PermissionError): - store['foo'] = b'bar' + store[self.root + 'foo'] = b'bar' with pytest.raises(PermissionError): store.clear() def test_flush(self): - store = ZipStore('data/store.zip', mode='w') - store['foo'] = b'bar' + store = self.ZipStoreClass('data/store.zip', mode='w') + store[self.root + 'foo'] = b'bar' store.flush() - assert store['foo'] == b'bar' + assert store[self.root + 'foo'] == b'bar' store.close() - store = ZipStore('data/store.zip', mode='r') + store = self.ZipStoreClass('data/store.zip', mode='r') store.flush() # no-op def test_context_manager(self): @@ -1549,32 +1551,41 @@ def test_context_manager(self): def test_pop(self): # override because not implemented store = self.create_store() - store['foo'] = b'bar' + store[self.root + 'foo'] = b'bar' with pytest.raises(NotImplementedError): - store.pop('foo') + store.pop(self.root + 'foo') def test_popitem(self): # override because not implemented store = self.create_store() - store['foo'] = b'bar' + store[self.root + 'foo'] = b'bar' with pytest.raises(NotImplementedError): store.popitem() def test_permissions(self): - store = ZipStore('data/store.zip', mode='w') - store['foo'] = b'bar' - store['baz/'] = b'' + store = self.ZipStoreClass('data/store.zip', mode='w') + foo_key = 'foo' if self.version == 2 else self.root + 'foo' + # TODO: cannot provide key ending in / for v3 + # how to create an empty folder in that case? + baz_key = 'baz/' if self.version == 2 else self.root + 'baz' + store[foo_key] = b'bar' + store[baz_key] = b'' + store.flush() store.close() z = ZipFile('data/store.zip', 'r') - info = z.getinfo('foo') + info = z.getinfo(foo_key) perm = oct(info.external_attr >> 16) assert perm == '0o644' - info = z.getinfo('baz/') + info = z.getinfo(baz_key) perm = oct(info.external_attr >> 16) # only for posix platforms if os.name == 'posix': - assert perm == '0o40775' + if self.version == 2: + assert perm == '0o40775' + else: + # baz/ on v2, but baz on v3, so not a directory + assert perm == '0o644' z.close() diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 8f608bb564..ff3e0656af 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -741,21 +741,14 @@ def test_init_array(self): class TestZipStoreV3(_TestZipStore, StoreV3Tests): + ZipStoreClass = ZipStoreV3 + def create_store(self, **kwargs): path = tempfile.mktemp(suffix='.zip') atexit.register(os.remove, path) store = ZipStoreV3(path, mode='w', **kwargs) return store - def test_mode(self): - with ZipStoreV3('data/store.zip', mode='w') as store: - store['foo'] = b'bar' - store = ZipStoreV3('data/store.zip', mode='r') - with pytest.raises(PermissionError): - store['foo'] = b'bar' - with pytest.raises(PermissionError): - store.clear() - class TestDBMStoreV3(_TestDBMStore, StoreV3Tests): From d2ee3912a87209e2de5dfa3b94365ee38712be66 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 12:10:43 -0500 Subject: [PATCH 050/109] add default rmdir implementation to all StoreV3 classes without these can be overridden by the other V2 class in the MRO --- zarr/storage.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index 2800d61b16..6d3562b93b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2107,6 +2107,10 @@ def __contains__(self, key): key = key.encode("ascii") return key in self.db + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + class LMDBStore(Store): """Storage class using LMDB. Requires the `lmdb `_ @@ -3210,6 +3214,10 @@ def getsize(self, path=None): else: return 0 + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + ZipStoreV3.__doc__ = ZipStore.__doc__ @@ -3242,6 +3250,10 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + RedisStoreV3.__doc__ = RedisStore.__doc__ @@ -3255,6 +3267,10 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ @@ -3268,6 +3284,10 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + DBMStoreV3.__doc__ = DBMStore.__doc__ @@ -3281,6 +3301,10 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + LMDBStoreV3.__doc__ = LMDBStore.__doc__ @@ -3349,5 +3373,9 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ From 6d1ac2050ded180c158ea97125895c5a82842739 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 12:15:05 -0500 Subject: [PATCH 051/109] fix test_sync.py --- zarr/tests/test_sync.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 1a763dc7f7..3cee0d0446 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -23,8 +23,8 @@ class TestAttributesWithThreadSynchronizer(TestAttributes): - def init_attributes(self, store, read_only=False, cache=True): - key = 'attrs' + def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): + key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' synchronizer = ThreadSynchronizer() return Attributes(store, synchronizer=synchronizer, key=key, read_only=read_only, cache=cache) @@ -32,8 +32,8 @@ def init_attributes(self, store, read_only=False, cache=True): class TestAttributesProcessSynchronizer(TestAttributes): - def init_attributes(self, store, read_only=False, cache=True): - key = 'attrs' + def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): + key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' sync_path = mkdtemp() atexit.register(shutil.rmtree, sync_path) synchronizer = ProcessSynchronizer(sync_path) From e12adac8ddaa84edd7df860fc7a02cd6220af728 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 14:11:52 -0500 Subject: [PATCH 052/109] all rmdir methods for StoreV3 classes need to remove associated metadata --- zarr/storage.py | 55 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 6d3562b93b..2e9e17eeef 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -200,7 +200,7 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - store_version = getattr(store, '_store_version', 2) == 2 + store_version = getattr(store, '_store_version', 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) # type: ignore @@ -2109,7 +2109,7 @@ def __contains__(self, key): def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) + _rmdir_from_keys(self, path) class LMDBStore(Store): @@ -3016,6 +3016,16 @@ def rmdir(self, path=None): store_path = self.dir_path(base + path) if self.fs.isdir(store_path): self.fs.rm(store_path, recursive=True) + + # remove any associated metadata files + sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + meta_dir = ('meta/root/' + path).rstrip('/') + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in self: + self.fs.rm(array_meta_file) # type: ignore + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in self: + self.fs.rm(group_meta_file) # type: ignore else: store_path = self.dir_path(path) if self.fs.isdir(store_path): @@ -3087,10 +3097,20 @@ def rmdir(self, path: Path = None): parent, key = self._get_parent(base + path) value = parent[key] except KeyError: - return + continue else: if isinstance(value, self.cls): del parent[key] + + # remove any associated metadata files + sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + meta_dir = ('meta/root/' + path).rstrip('/') + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in self: + self.erase(array_meta_file) # type: ignore + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in self: + self.erase(group_meta_file) # type: ignore else: # clear out root self.root = self.cls() @@ -3165,7 +3185,19 @@ def rmdir(self, path=None): dir_path = os.path.join(dir_path, base + store_path) if os.path.isdir(dir_path): shutil.rmtree(dir_path) - # TODO: also remove any residual .array.json or .group.json files? + + # remove any associated metadata files + sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + meta_dir = ('meta/root/' + path).rstrip('/') + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in self: + file_path = os.path.join(dir_path, array_meta_file) + os.remove(file_path) + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in self: + file_path = os.path.join(dir_path, group_meta_file) + os.remove(file_path) + elif os.path.isdir(dir_path): shutil.rmtree(dir_path) @@ -3346,6 +3378,21 @@ def rmdir(self, path=None): self.cursor.execute( 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) ) + # remove any associated metadata files + sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + meta_dir = ('meta/root/' + path).rstrip('/') + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in self: + with self.lock: + self.cursor.execute( + 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (array_meta_file,) + ) + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in self: + with self.lock: + self.cursor.execute( + 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (group_meta_file,) + ) else: self.clear() From d142da1702be9233f4dcec696149603fd78cd6b5 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 14:14:31 -0500 Subject: [PATCH 053/109] avoid warning from test_entropy.py --- zarr/tests/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index a58a309534..b791179a23 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -1441,4 +1441,4 @@ def test_slice_selection_uints(): arr = np.arange(24).reshape((4, 6)) idx = np.uint64(3) slice_sel = make_slice_selection((idx,)) - assert arr[slice_sel].shape == (1, 6) + assert arr[tuple(slice_sel)].shape == (1, 6) From 2c8098e7f1fb785feda13702fdb426a8ad3a29db Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 16:51:30 -0500 Subject: [PATCH 054/109] pep8 fixes --- zarr/_storage/store.py | 3 ++- zarr/storage.py | 2 +- zarr/tests/test_attrs.py | 4 +++- zarr/tests/test_storage_v3.py | 1 - 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 2d07027866..3e43014a35 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -132,6 +132,7 @@ def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) _rmdir_from_keys(self, path) + _valid_key_characters = set(ascii_letters + digits + "/.-_") @@ -359,7 +360,7 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: del store[key] -def _rmdir_from_keys_v3(store: BaseStore, path: Optional[str]="") -> None: +def _rmdir_from_keys_v3(store: BaseStore, path: Optional[str] = "") -> None: meta_dir = 'meta/root/' + path meta_dir = meta_dir.rstrip('/') diff --git a/zarr/storage.py b/zarr/storage.py index 2e9e17eeef..03890868dc 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -206,7 +206,7 @@ def rmdir(store: StoreLike, path: Path = None): store.rmdir(path) # type: ignore else: # slow version, delete one key at a time - if store_version == 2: + if store_version == 2: _rmdir_from_keys(store, path) else: _rmdir_from_keys_v3(store, path) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 03625c9bae..1435b64dcc 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -105,7 +105,9 @@ def test_read_only(self, zarr_version): if zarr_version == 2: store['.zattrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') else: - store['meta/root/attrs'] = json.dumps(dict(attributes=dict(foo='bar', baz=42))).encode('ascii') + store['meta/root/attrs'] = json.dumps( + dict(attributes=dict(foo='bar', baz=42)) + ).encode('ascii') assert a['foo'] == 'bar' assert a['baz'] == 42 with pytest.raises(PermissionError): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index ff3e0656af..3b425442d5 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -610,7 +610,6 @@ def test_list_prefix(self): store.list_prefix(prefix='/meta/root') - class TestMappingStoreV3(StoreV3Tests): def create_store(self, **kwargs): From ae2145a488dc8109f096901db6197dca925d5477 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 17:51:15 -0500 Subject: [PATCH 055/109] greatly reduce code duplication in test_storage_v3.py instead add v3 code path to existing test methods in test_storage.py --- zarr/tests/test_storage.py | 492 ++++++++++++++++++++++------------ zarr/tests/test_storage_v3.py | 442 +----------------------------- 2 files changed, 326 insertions(+), 608 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 06e2156bed..bc0bf4a066 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -20,7 +20,7 @@ import zarr from zarr._storage.store import _get_hierarchy_metadata from zarr.codecs import BZ2, AsType, Blosc, Zlib -from zarr.errors import MetadataError +from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataError from zarr.hierarchy import group from zarr.meta import ZARR_FORMAT, decode_array_metadata from zarr.n5 import N5Store, N5FSStore @@ -314,38 +314,47 @@ def test_getsize(self): def test_hierarchy(self): # setup store = self.create_store() - store['a'] = b'aaa' - store['b'] = b'bbb' - store['c/d'] = b'ddd' - store['c/e/f'] = b'fff' - store['c/e/g'] = b'ggg' + store[self.root + 'a'] = b'aaa' + store[self.root + 'b'] = b'bbb' + store[self.root + 'c/d'] = b'ddd' + store[self.root + 'c/e/f'] = b'fff' + store[self.root + 'c/e/g'] = b'ggg' # check keys - assert 'a' in store - assert 'b' in store - assert 'c/d' in store - assert 'c/e/f' in store - assert 'c/e/g' in store - assert 'c' not in store - assert 'c/' not in store - assert 'c/e' not in store - assert 'c/e/' not in store - assert 'c/d/x' not in store + assert self.root + 'a' in store + assert self.root + 'b' in store + assert self.root + 'c/d' in store + assert self.root + 'c/e/f' in store + assert self.root + 'c/e/g' in store + assert self.root + 'c' not in store + assert self.root + 'c/' not in store + assert self.root + 'c/e' not in store + assert self.root + 'c/e/' not in store + assert self.root + 'c/d/x' not in store # check __getitem__ with pytest.raises(KeyError): - store['c'] + store[self.root + 'c'] with pytest.raises(KeyError): - store['c/e'] + store[self.root + 'c/e'] with pytest.raises(KeyError): - store['c/d/x'] + store[self.root + 'c/d/x'] # test getsize (optional) if hasattr(store, 'getsize'): - assert 6 == store.getsize() + # TODO: proper behavior of getsize? + # v3 returns size of all nested arrays, not just the + # size of the arrays in the current folder. + if self.version == 2: + assert 6 == store.getsize() + else: + assert 15 == store.getsize() assert 3 == store.getsize('a') assert 3 == store.getsize('b') - assert 3 == store.getsize('c') + if self.version == 2: + assert 3 == store.getsize('c') + else: + assert 9 == store.getsize('c') assert 3 == store.getsize('c/d') assert 6 == store.getsize('c/e') assert 3 == store.getsize('c/e/f') @@ -360,77 +369,77 @@ def test_hierarchy(self): # test listdir (optional) if hasattr(store, 'listdir'): - assert {'a', 'b', 'c'} == set(store.listdir()) - assert {'d', 'e'} == set(store.listdir('c')) - assert {'f', 'g'} == set(store.listdir('c/e')) + assert {'a', 'b', 'c'} == set(store.listdir(self.root)) + assert {'d', 'e'} == set(store.listdir(self.root + 'c')) + assert {'f', 'g'} == set(store.listdir(self.root + 'c/e')) # no exception raised if path does not exist or is leaf - assert [] == store.listdir('x') - assert [] == store.listdir('a/x') - assert [] == store.listdir('c/x') - assert [] == store.listdir('c/x/y') - assert [] == store.listdir('c/d/y') - assert [] == store.listdir('c/d/y/z') - assert [] == store.listdir('c/e/f') + assert [] == store.listdir(self.root + 'x') + assert [] == store.listdir(self.root + 'a/x') + assert [] == store.listdir(self.root + 'c/x') + assert [] == store.listdir(self.root + 'c/x/y') + assert [] == store.listdir(self.root + 'c/d/y') + assert [] == store.listdir(self.root + 'c/d/y/z') + assert [] == store.listdir(self.root + 'c/e/f') # test rename (optional) if store.is_erasable(): store.rename("c/e", "c/e2") - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c/e2" not in store - assert "c/e2/f" in store - assert "c/e2/g" in store + assert self.root + "c/d" in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store + assert self.root + "c/e2" not in store + assert self.root + "c/e2/f" in store + assert self.root + "c/e2/g" in store store.rename("c/e2", "c/e") - assert "c/d" in store - assert "c/e2" not in store - assert "c/e2/f" not in store - assert "c/e2/g" not in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store + assert self.root + "c/d" in store + assert self.root + "c/e2" not in store + assert self.root + "c/e2/f" not in store + assert self.root + "c/e2/g" not in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" in store + assert self.root + "c/e/g" in store store.rename("c", "c1/c2/c3") - assert "a" in store - assert "c" not in store - assert "c/d" not in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" in store - assert "c1/c2/c3/e/g" in store + assert self.root + "a" in store + assert self.root + "c" not in store + assert self.root + "c/d" not in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store + assert self.root + "c1" not in store + assert self.root + "c1/c2" not in store + assert self.root + "c1/c2/c3" not in store + assert self.root + "c1/c2/c3/d" in store + assert self.root + "c1/c2/c3/e" not in store + assert self.root + "c1/c2/c3/e/f" in store + assert self.root + "c1/c2/c3/e/g" in store store.rename("c1/c2/c3", "c") - assert "c" not in store - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" not in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" not in store - assert "c1/c2/c3/e/g" not in store + assert self.root + "c" not in store + assert self.root + "c/d" in store + assert self.root + "c/e" not in store + assert self.root + "c/e/f" in store + assert self.root + "c/e/g" in store + assert self.root + "c1" not in store + assert self.root + "c1/c2" not in store + assert self.root + "c1/c2/c3" not in store + assert self.root + "c1/c2/c3/d" not in store + assert self.root + "c1/c2/c3/e" not in store + assert self.root + "c1/c2/c3/e/f" not in store + assert self.root + "c1/c2/c3/e/g" not in store # test rmdir (optional) store.rmdir("c/e") - assert "c/d" in store - assert "c/e/f" not in store - assert "c/e/g" not in store + assert self.root + "c/d" in store + assert self.root + "c/e/f" not in store + assert self.root + "c/e/g" not in store store.rmdir("c") - assert "c/d" not in store + assert self.root + "c/d" not in store store.rmdir() - assert 'a' not in store - assert 'b' not in store - store['a'] = b'aaa' - store['c/d'] = b'ddd' - store['c/e/f'] = b'fff' + assert self.root + 'a' not in store + assert self.root + 'b' not in store + store[self.root + 'a'] = b'aaa' + store[self.root + 'c/d'] = b'ddd' + store[self.root + 'c/e/f'] = b'fff' # no exceptions raised if path does not exist or is leaf store.rmdir('x') store.rmdir('a/x') @@ -439,9 +448,9 @@ def test_hierarchy(self): store.rmdir('c/d/y') store.rmdir('c/d/y/z') store.rmdir('c/e/f') - assert 'a' in store - assert 'c/d' in store - assert 'c/e/f' in store + assert self.root + 'a' in store + assert self.root + 'c/d' in store + assert self.root + 'c/e/f' in store store.close() @@ -487,33 +496,51 @@ def test_init_group_overwrite_chunk_store(self): def _test_init_array_overwrite(self, order): # setup store = self.create_store() - store[array_meta_key] = store._metadata_class.encode_array_metadata( - dict(shape=(2000,), - chunks=(200,), - dtype=np.dtype('u1'), - compressor=Zlib(1).get_config(), - fill_value=0, - order=order, - filters=None) - ) + if self.version == 2: + path = None + mkey = array_meta_key + meta = dict(shape=(2000,), + chunks=(200,), + dtype=np.dtype('u1'), + compressor=Zlib(1).get_config(), + fill_value=0, + order=order, + filters=None) + else: + path = 'arr1' # no default, have to specify for v3 + mkey = 'meta/root/' + path + '.array.json' + meta = dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=Zlib(1), + fill_value=0, + chunk_memory_layout=order, + filters=None) + store[mkey] = store._metadata_class.encode_array_metadata(meta) # don't overwrite (default) - with pytest.raises(ValueError): - init_array(store, shape=1000, chunks=100) + with pytest.raises(ContainsArrayError): + init_array(store, shape=1000, chunks=100, path=path) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', - overwrite=True) + overwrite=True, path=path) except NotImplementedError: pass else: - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype('i4') == meta['dtype'] + else: + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype('i4') == meta['dtype'] store.close() @@ -523,14 +550,22 @@ def test_init_array_path(self): init_array(store, shape=1000, chunks=100, path=path) # check metadata - key = path + '/' + array_meta_key - assert key in store - meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta['zarr_format'] + if self.version == 2: + mkey = path + '/' + array_meta_key + else: + mkey = 'meta/root/' + path + '.array.json' + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + assert default_compressor.get_config() == meta['compressor'] + else: + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype(None) == meta['data_type'] + assert default_compressor == meta['compressor'] assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] - assert default_compressor.get_config() == meta['compressor'] assert meta['fill_value'] is None store.close() @@ -539,18 +574,30 @@ def _test_init_array_overwrite_path(self, order): # setup path = 'foo/bar' store = self.create_store() - meta = dict(shape=(2000,), - chunks=(200,), - dtype=np.dtype('u1'), - compressor=Zlib(1).get_config(), - fill_value=0, - order=order, - filters=None) - store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) - store[path + '/' + array_meta_key] = store._metadata_class.encode_array_metadata(meta) + if self.version == 2: + mkey = path + '/' + array_meta_key + meta = dict(shape=(2000,), + chunks=(200,), + dtype=np.dtype('u1'), + compressor=Zlib(1).get_config(), + fill_value=0, + order=order, + filters=None) + else: + mkey = 'meta/root/' + path + '.array.json' + meta = dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=Zlib(1), + fill_value=0, + chunk_memory_layout=order, + filters=None) + store[mkey] = store._metadata_class.encode_array_metadata(meta) # don't overwrite - with pytest.raises(ValueError): + with pytest.raises(ContainsArrayError): init_array(store, shape=1000, chunks=100, path=path) # do overwrite @@ -560,15 +607,20 @@ def _test_init_array_overwrite_path(self, order): except NotImplementedError: pass else: - assert group_meta_key in store - assert array_meta_key not in store - assert (path + '/' + array_meta_key) in store + if self.version == 2: + assert group_meta_key in store + assert array_meta_key not in store + assert mkey in store # should have been overwritten - meta = store._metadata_class.decode_array_metadata(store[path + '/' + array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + meta = store._metadata_class.decode_array_metadata(store[mkey]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype('i4') == meta['dtype'] + else: + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype('i4') == meta['dtype'] store.close() @@ -576,10 +628,16 @@ def test_init_array_overwrite_group(self): # setup path = 'foo/bar' store = self.create_store() - store[path + '/' + group_meta_key] = store._metadata_class.encode_group_metadata() + if self.version == 2: + array_key = path + '/' + array_meta_key + group_key = path + '/' + group_meta_key + else: + array_key = 'meta/root/' + path + '.array.json' + group_key = 'meta/root/' + path + '.group.json' + store[group_key] = store._metadata_class.encode_group_metadata() # don't overwrite - with pytest.raises(ValueError): + with pytest.raises(ContainsGroupError): init_array(store, shape=1000, chunks=100, path=path) # do overwrite @@ -589,13 +647,17 @@ def test_init_array_overwrite_group(self): except NotImplementedError: pass else: - assert (path + '/' + group_meta_key) not in store - assert (path + '/' + array_meta_key) in store - meta = store._metadata_class.decode_array_metadata(store[path + '/' + array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert group_key not in store + assert array_key in store + meta = store._metadata_class.decode_array_metadata(store[array_key]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype('i4') == meta['dtype'] + else: + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype('i4') == meta['dtype'] store.close() @@ -603,61 +665,105 @@ def _test_init_array_overwrite_chunk_store(self, order): # setup store = self.create_store() chunk_store = self.create_store() - store[array_meta_key] = store._metadata_class.encode_array_metadata( - dict(shape=(2000,), - chunks=(200,), - dtype=np.dtype('u1'), - compressor=None, - fill_value=0, - filters=None, - order=order) - ) - chunk_store['0'] = b'aaa' - chunk_store['1'] = b'bbb' + + if self.version == 2: + path = None + data_path = '' + mkey = array_meta_key + meta = dict(shape=(2000,), + chunks=(200,), + dtype=np.dtype('u1'), + compressor=None, + fill_value=0, + filters=None, + order=order) + else: + path = 'arr1' + data_path = 'data/root/arr1/' + mkey = 'meta/root/' + path + '.array.json' + meta = dict(shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=None, + fill_value=0, + filters=None, + chunk_memory_layout=order) + + store[mkey] = store._metadata_class.encode_array_metadata(meta) + + chunk_store[data_path + '0'] = b'aaa' + chunk_store[data_path + '1'] = b'bbb' # don't overwrite (default) - with pytest.raises(ValueError): - init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) + with pytest.raises(ContainsArrayError): + init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) # do overwrite try: - init_array(store, shape=1000, chunks=100, dtype='i4', + init_array(store, path=path, shape=1000, chunks=100, dtype='i4', overwrite=True, chunk_store=chunk_store) except NotImplementedError: pass else: - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + assert (100,) == meta['chunks'] + assert np.dtype('i4') == meta['dtype'] + else: + assert (100,) == meta['chunk_grid']['chunk_shape'] + assert np.dtype('i4') == meta['data_type'] assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype('i4') == meta['dtype'] - assert '0' not in chunk_store - assert '1' not in chunk_store + assert data_path + '0' not in chunk_store + assert data_path + '1' not in chunk_store store.close() chunk_store.close() def test_init_array_compat(self): store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor='none') - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert meta['compressor'] is None - + if self.version == 2: + path = None + mkey = array_meta_key + else: + path = 'arr1' + mkey = 'meta/root/' + path + '.array.json' + init_array(store, path=path, shape=1000, chunks=100, compressor='none') + meta = store._metadata_class.decode_array_metadata(store[mkey]) + if self.version == 2: + assert meta['compressor'] is None + else: + assert 'compressor' not in meta store.close() def test_init_group(self): store = self.create_store() - init_group(store) + if self.version == 2: + path = None + mkey = group_meta_key + else: + path = 'foo' + mkey = 'meta/root/' + path + '.group.json' + init_group(store, path=path) # check metadata - assert group_meta_key in store - meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert mkey in store + meta = store._metadata_class.decode_group_metadata(store[mkey]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + else: + assert meta == {'attributes': {}} store.close() def _test_init_group_overwrite(self, order): + if self.version == 3: + pytest.skip( + "In v3 array and group names cannot overlap" + ) # setup store = self.create_store() store[array_meta_key] = store._metadata_class.encode_array_metadata( @@ -671,7 +777,7 @@ def _test_init_group_overwrite(self, order): ) # don't overwrite array (default) - with pytest.raises(ValueError): + with pytest.raises(ContainsArrayError): init_group(store) # do overwrite @@ -695,15 +801,31 @@ def _test_init_group_overwrite_path(self, order): # setup path = 'foo/bar' store = self.create_store() - meta = dict(shape=(2000,), - chunks=(200,), - dtype=np.dtype('u1'), - compressor=None, - fill_value=0, - order=order, - filters=None) - store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) - store[path + '/' + array_meta_key] = store._metadata_class.encode_array_metadata(meta) + if self.version == 2: + meta = dict(shape=(2000,), + chunks=(200,), + dtype=np.dtype('u1'), + compressor=None, + fill_value=0, + order=order, + filters=None) + array_key = path + '/' + array_meta_key + group_key = path + '/' + group_meta_key + else: + meta = dict( + shape=(2000,), + chunk_grid=dict(type='regular', + chunk_shape=(200,), + separator=('/')), + data_type=np.dtype('u1'), + compressor=None, + fill_value=0, + filters=None, + chunk_memory_layout=order, + ) + array_key = 'meta/root/' + path + '.array.json' + group_key = 'meta/root/' + path + '.group.json' + store[array_key] = store._metadata_class.encode_array_metadata(meta) # don't overwrite with pytest.raises(ValueError): @@ -715,17 +837,25 @@ def _test_init_group_overwrite_path(self, order): except NotImplementedError: pass else: - assert array_meta_key not in store - assert group_meta_key in store - assert (path + '/' + array_meta_key) not in store - assert (path + '/' + group_meta_key) in store + if self.version == 2: + assert array_meta_key not in store + assert group_meta_key in store + assert array_key not in store + assert group_key in store # should have been overwritten - meta = store._metadata_class.decode_group_metadata(store[path + '/' + group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + meta = store._metadata_class.decode_group_metadata(store[group_key]) + if self.version == 2: + assert ZARR_FORMAT == meta['zarr_format'] + else: + assert meta == {'attributes': {}} store.close() def _test_init_group_overwrite_chunk_store(self, order): + if self.version == 3: + pytest.skip( + "In v3 array and group names cannot overlap" + ) # setup store = self.create_store() chunk_store = self.create_store() @@ -1007,14 +1137,20 @@ def test_deep_ndim(self): import zarr store = self.create_store() - foo = zarr.open_group(store=store) + path = None if self.version == 2 else 'group1' + foo = zarr.open_group(store=store, path=path) bar = foo.create_group("bar") baz = bar.create_dataset("baz", shape=(4, 4, 4), chunks=(2, 2, 2), dtype="i8") baz[:] = 1 - assert set(store.listdir()) == {".zgroup", "bar"} + if self.version == 2: + assert set(store.listdir()) == {".zgroup", "bar"} + else: + assert set(store.listdir()) == set(["data", "meta", "zarr.json"]) + assert set(store.listdir("meta/root/" + path)) == set(["bar", "bar.group.json"]) + assert set(store.listdir("data/root/" + path)) == set(["bar"]) assert foo["bar"]["baz"][(0, 0, 0)] == 1 def test_not_fsspec(self): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 3b425442d5..039961ec1b 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -7,11 +7,8 @@ import pytest from zarr._storage.store import _valid_key_characters -from zarr.codecs import Zlib -from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.meta import ZARR_FORMAT -from zarr.storage import (array_meta_key, atexit_rmglob, atexit_rmtree, - default_compressor, getsize, init_array, init_group) +from zarr.storage import (atexit_rmglob, atexit_rmtree, default_compressor, + getsize, init_array) from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, DirectoryStoreV3, NestedDirectoryStoreV3, RedisStoreV3, MongoDBStoreV3, DBMStoreV3, @@ -145,11 +142,10 @@ class StoreV3Tests(_StoreTests): def test_getsize(self): # TODO: determine proper getsize() behavior for v3 - - # Currently returns the combined size of entries under - # meta/root/path and data/root/path. - # Any path not under meta/root/ or data/root/ (including zarr.json) - # returns size 0. + # Currently returns the combined size of entries under + # meta/root/path and data/root/path. + # Any path not under meta/root/ or data/root/ (including zarr.json) + # returns size 0. store = self.create_store() if isinstance(store, dict) or hasattr(store, 'getsize'): @@ -173,149 +169,6 @@ def test_getsize(self): store['data/root/spong'] = np.frombuffer(b'zzzzz', dtype='u1') assert 19 == getsize(store) assert 5 == getsize(store, 'spong') - - store.close() - - # Note: Except for a couple of getsize results this method could be moved - # back to the v2 class. Determine what is the correct behavior here. - - # noinspection PyStatementEffect - def test_hierarchy(self): - # setup - - store = self.create_store() - store[self.root + 'a'] = b'aaa' - store[self.root + 'b'] = b'bbb' - store[self.root + 'c/d'] = b'ddd' - store[self.root + 'c/e/f'] = b'fff' - store[self.root + 'c/e/g'] = b'ggg' - - # check keys - assert self.root + 'a' in store - assert self.root + 'b' in store - assert self.root + 'c/d' in store - assert self.root + 'c/e/f' in store - assert self.root + 'c/e/g' in store - assert self.root + 'c' not in store - assert self.root + 'c/' not in store - assert self.root + 'c/e' not in store - assert self.root + 'c/e/' not in store - assert self.root + 'c/d/x' not in store - - # check __getitem__ - with pytest.raises(KeyError): - store[self.root + 'c'] - with pytest.raises(KeyError): - store[self.root + 'c/e'] - with pytest.raises(KeyError): - store[self.root + 'c/d/x'] - - # test getsize (optional) - if hasattr(store, 'getsize'): - # TODO: proper behavior of getsize? - # v3 returns size of all nested arrays, not just the - # size of the arrays in the current folder. - assert 15 == store.getsize() # == 6 for v2 - assert 3 == store.getsize('a') - assert 3 == store.getsize('b') - assert 9 == store.getsize('c') # == 3 for v2 - assert 3 == store.getsize('c/d') - assert 6 == store.getsize('c/e') - assert 3 == store.getsize('c/e/f') - assert 3 == store.getsize('c/e/g') - # non-existent paths - assert 0 == store.getsize('x') - assert 0 == store.getsize('a/x') - assert 0 == store.getsize('c/x') - assert 0 == store.getsize('c/x/y') - assert 0 == store.getsize('c/d/y') - assert 0 == store.getsize('c/d/y/z') - - # test listdir (optional) - if hasattr(store, 'listdir'): - assert {'a', 'b', 'c'} == set(store.listdir(self.root)) - assert {'d', 'e'} == set(store.listdir(self.root + 'c')) - assert {'f', 'g'} == set(store.listdir(self.root + 'c/e')) - # no exception raised if path does not exist or is leaf - assert [] == store.listdir(self.root + 'x') - assert [] == store.listdir(self.root + 'a/x') - assert [] == store.listdir(self.root + 'c/x') - assert [] == store.listdir(self.root + 'c/x/y') - assert [] == store.listdir(self.root + 'c/d/y') - assert [] == store.listdir(self.root + 'c/d/y/z') - assert [] == store.listdir(self.root + 'c/e/f') - - # test rename (optional) - if store.is_erasable(): - store.rename("c/e", "c/e2") - assert self.root + "c/d" in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - assert self.root + "c/e2" not in store - assert self.root + "c/e2/f" in store - assert self.root + "c/e2/g" in store - store.rename("c/e2", "c/e") - assert self.root + "c/d" in store - assert self.root + "c/e2" not in store - assert self.root + "c/e2/f" not in store - assert self.root + "c/e2/g" not in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" in store - assert self.root + "c/e/g" in store - store.rename("c", "c1/c2/c3") - assert self.root + "a" in store - assert self.root + "c" not in store - assert self.root + "c/d" not in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - assert self.root + "c1" not in store - assert self.root + "c1/c2" not in store - assert self.root + "c1/c2/c3" not in store - assert self.root + "c1/c2/c3/d" in store - assert self.root + "c1/c2/c3/e" not in store - assert self.root + "c1/c2/c3/e/f" in store - assert self.root + "c1/c2/c3/e/g" in store - store.rename("c1/c2/c3", "c") - assert self.root + "c" not in store - assert self.root + "c/d" in store - assert self.root + "c/e" not in store - assert self.root + "c/e/f" in store - assert self.root + "c/e/g" in store - assert self.root + "c1" not in store - assert self.root + "c1/c2" not in store - assert self.root + "c1/c2/c3" not in store - assert self.root + "c1/c2/c3/d" not in store - assert self.root + "c1/c2/c3/e" not in store - assert self.root + "c1/c2/c3/e/f" not in store - assert self.root + "c1/c2/c3/e/g" not in store - - # test rmdir (optional) - store.rmdir("c/e") - assert self.root + "c/d" in store - assert self.root + "c/e/f" not in store - assert self.root + "c/e/g" not in store - store.rmdir("c") - assert self.root + "c/d" not in store - store.rmdir() - assert self.root + 'a' not in store - assert self.root + 'b' not in store - store[self.root + 'a'] = b'aaa' - store[self.root + 'c/d'] = b'ddd' - store[self.root + 'c/e/f'] = b'fff' - # no exceptions raised if path does not exist or is leaf - store.rmdir('x') - store.rmdir('a/x') - store.rmdir('c/x') - store.rmdir('c/x/y') - store.rmdir('c/d/y') - store.rmdir('c/d/y/z') - store.rmdir('c/e/f') - assert self.root + 'a' in store - assert self.root + 'c/d' in store - assert self.root + 'c/e/f' in store - store.close() def test_init_array(self, dimension_separator_fixture_v3): @@ -331,8 +184,6 @@ def test_init_array(self, dimension_separator_fixture_v3): mkey = 'meta/root/' + path + '.array.json' assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) - # TODO: zarr_format already stored at the heirarchy level should we - # also keep it in the .array.json? assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] @@ -342,257 +193,6 @@ def test_init_array(self, dimension_separator_fixture_v3): assert meta['chunk_grid']['separator'] is want_dim_sep store.close() - def _test_init_array_overwrite(self, order): - # setup - store = self.create_store() - - if store._store_version < 3: - path = None - mkey = array_meta_key - else: - path = 'arr1' # no default, have to specify for v3 - mkey = 'meta/root/' + path + '.array.json' - store[mkey] = store._metadata_class.encode_array_metadata( - dict(shape=(2000,), - chunk_grid=dict(type='regular', - chunk_shape=(200,), - separator=('/')), - data_type=np.dtype('u1'), - compressor=Zlib(1), - fill_value=0, - chunk_memory_layout=order, - filters=None) - ) - - # don't overwrite (default) - with pytest.raises(ContainsArrayError): - init_array(store, path=path, shape=1000, chunks=100) - - # do overwrite - try: - init_array(store, path=path, shape=1000, chunks=100, - dtype='i4', overwrite=True) - except NotImplementedError: - pass - else: - assert mkey in store - meta = store._metadata_class.decode_array_metadata( - store[mkey] - ) - assert (1000,) == meta['shape'] - if store._store_version == 2: - assert ZARR_FORMAT == meta['zarr_format'] - assert (100,) == meta['chunks'] - assert np.dtype('i4') == meta['dtype'] - elif store._store_version == 3: - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype('i4') == meta['data_type'] - else: - raise ValueError( - "unexpected store version: {store._store_version}" - ) - store.close() - - def test_init_array_path(self): - path = 'foo/bar' - store = self.create_store() - init_array(store, shape=1000, chunks=100, path=path) - - # check metadata - mkey = 'meta/root/' + path + '.array.json' - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta['shape'] - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype(None) == meta['data_type'] - assert default_compressor == meta['compressor'] - assert meta['fill_value'] is None - - store.close() - - def _test_init_array_overwrite_path(self, order): - # setup - path = 'foo/bar' - store = self.create_store() - meta = dict(shape=(2000,), - chunk_grid=dict(type='regular', - chunk_shape=(200,), - separator=('/')), - data_type=np.dtype('u1'), - compressor=Zlib(1), - fill_value=0, - chunk_memory_layout=order, - filters=None) - mkey = 'meta/root/' + path + '.array.json' - store[mkey] = store._metadata_class.encode_array_metadata(meta) - - # don't overwrite - with pytest.raises(ContainsArrayError): - init_array(store, shape=1000, chunks=100, path=path) - - # do overwrite - try: - init_array(store, shape=1000, chunks=100, dtype='i4', path=path, - overwrite=True) - except NotImplementedError: - pass - else: - assert mkey in store - # should have been overwritten - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta['shape'] - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype('i4') == meta['data_type'] - - store.close() - - def test_init_array_overwrite_group(self): - # setup - path = 'foo/bar' - store = self.create_store() - array_key = 'meta/root/' + path + '.array.json' - group_key = 'meta/root/' + path + '.group.json' - store[group_key] = store._metadata_class.encode_group_metadata() - - with pytest.raises(ContainsGroupError): - init_array(store, shape=1000, chunks=100, path=path) - - # do overwrite - try: - init_array(store, shape=1000, chunks=100, dtype='i4', path=path, - overwrite=True) - except NotImplementedError: - pass - else: - assert group_key not in store - assert array_key in store - meta = store._metadata_class.decode_array_metadata( - store[array_key] - ) - assert (1000,) == meta['shape'] - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype('i4') == meta['data_type'] - - store.close() - - def _test_init_array_overwrite_chunk_store(self, order): - # setup - store = self.create_store() - chunk_store = self.create_store() - path = 'arr1' - mkey = 'meta/root/' + path + '.array.json' - store[mkey] = store._metadata_class.encode_array_metadata( - dict(shape=(2000,), - chunk_grid=dict(type='regular', - chunk_shape=(200,), - separator=('/')), - data_type=np.dtype('u1'), - compressor=None, - fill_value=0, - filters=None, - chunk_memory_layout=order) - ) - - chunk_store['data/root/arr1/0'] = b'aaa' - chunk_store['data/root/arr1/1'] = b'bbb' - - assert 'data/root/arr1/0' in chunk_store - assert 'data/root/arr1/1' in chunk_store - - # don't overwrite (default) - with pytest.raises(ValueError): - init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) - - # do overwrite - try: - init_array(store, path=path, shape=1000, chunks=100, dtype='i4', - overwrite=True, chunk_store=chunk_store) - except NotImplementedError: - pass - else: - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta['shape'] - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype('i4') == meta['data_type'] - assert 'data/root/arr1/0' not in chunk_store - assert 'data/root/arr1/1' not in chunk_store - - store.close() - chunk_store.close() - - def test_init_array_compat(self): - store = self.create_store() - path = 'arr1' - init_array(store, path=path, shape=1000, chunks=100, compressor='none') - mkey = 'meta/root/' + path + '.array.json' - meta = store._metadata_class.decode_array_metadata( - store[mkey] - ) - assert 'compressor' not in meta - - store.close() - - def test_init_group(self): - store = self.create_store() - path = "meta/root/foo" - init_group(store, path=path) - - # check metadata - mkey = 'meta/root/' + path + '.group.json' - assert mkey in store - meta = store._metadata_class.decode_group_metadata(store[mkey]) - assert meta == {'attributes': {}} - - store.close() - - def _test_init_group_overwrite(self, order): - pytest.skip( - "In v3 array and group names cannot overlap" - ) - - def _test_init_group_overwrite_path(self, order): - # setup - path = 'foo/bar' - store = self.create_store() - meta = dict( - shape=(2000,), - chunk_grid=dict(type='regular', - chunk_shape=(200,), - separator=('/')), - data_type=np.dtype('u1'), - compressor=None, - fill_value=0, - filters=None, - chunk_memory_layout=order, - ) - array_key = 'meta/root/' + path + '.array.json' - group_key = 'meta/root/' + path + '.group.json' - store[array_key] = store._metadata_class.encode_array_metadata(meta) - - # don't overwrite - with pytest.raises(ContainsArrayError): - init_group(store, path=path) - - # do overwrite - try: - init_group(store, overwrite=True, path=path) - except NotImplementedError: - pass - else: - assert array_key not in store - assert group_key in store - # should have been overwritten - meta = store._metadata_class.decode_group_metadata(store[group_key]) - assert meta == {'attributes': {}} - - store.close() - - def _test_init_group_overwrite_chunk_store(self, order): - pytest.skip( - "In v3 array and group names cannot overlap" - ) - def test_list_prefix(self): store = self.create_store() @@ -664,32 +264,14 @@ def test_init_array(self): init_array(store, path=path, shape=1000, chunks=100) # check metadata - array_meta_key = 'meta/root/' + path + '.array.json' - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) + mkey = 'meta/root/' + path + '.array.json' + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] assert meta['chunk_grid']['separator'] == "/" - # TODO: remove this skip once v3 support is added to hierarchy.Group - @pytest.mark.skipif(True, reason="need v3 support in zarr.hierarchy.Group") - def test_deep_ndim(self): - import zarr - - store = self.create_store() - foo = zarr.open_group(store=store, path='group1') - bar = foo.create_group("bar") - baz = bar.create_dataset("baz", - shape=(4, 4, 4), - chunks=(2, 2, 2), - dtype="i8") - baz[:] = 1 - assert set(store.listdir()) == set(["data", "meta", "zarr.json"]) - assert set(store.listdir("meta/root/group1")) == set(["bar", "bar.group.json"]) - assert set(store.listdir("data/root/group1")) == set(["bar"]) - assert foo["bar"]["baz"][(0, 0, 0)] == 1 - @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStoreV3WithKeySeparator(StoreV3Tests): @@ -724,9 +306,9 @@ def test_init_array(self): init_array(store, path=path, shape=1000, chunks=100) # check metadata - array_meta_key = self.root + path + '.array.json' - assert array_meta_key in store - meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) + mkey = self.root + path + '.array.json' + assert mkey in store + meta = store._metadata_class.decode_array_metadata(store[mkey]) assert (1000,) == meta['shape'] assert (100,) == meta['chunk_grid']['chunk_shape'] assert np.dtype(None) == meta['data_type'] From 9fa4bdc4727e25a0035a8227724bd5664b83aed3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 20 Dec 2021 23:00:24 -0500 Subject: [PATCH 056/109] remove redundant test_hexdigest methods only need to defined expected() for each class reduce redundant code in test_core_v3.py --- zarr/meta.py | 2 +- zarr/tests/test_core.py | 383 +++++++++---------------------------- zarr/tests/test_core_v3.py | 278 +++------------------------ 3 files changed, 113 insertions(+), 550 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 02730f0c01..9d85251e10 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -62,7 +62,7 @@ def get_extended_dtype_info(dtype): type=dtype.str, fallback=None, ) - elif dtype.str.startswith("|U"): + elif dtype.str.startswith("U"): return dict( extension="TODO: unicode array protocol URL", # noqa type=dtype.str, diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 938a58b494..0833a58b64 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -43,7 +43,7 @@ class TestArray(unittest.TestCase): - _version = 2 + version = 2 def test_array_init(self): @@ -530,6 +530,8 @@ def test_setitem_data_not_shared(self): z.store.close() def expected(self): + # tests for array without path will not be run for v3 stores + assert self.version == 2 return [ "063b02ff8d9d3bab6da932ad5828b506ef0a6578", "f97b84dc9ffac807415f750100108764e837bb82", @@ -1113,6 +1115,19 @@ def test_dtypes(self): assert_array_equal(a, z[:]) z.store.close() + # unicode and bytestring dtypes + for dtype in ['S4', 'S6', 'U5', 'U5']: + n = 10 + z = self.create_array(shape=n, chunks=3, dtype=dtype) + assert z.dtype == np.dtype(dtype) + if dtype.startswith('S'): + a = np.asarray([b'name'] * n, dtype=dtype) + else: + a = np.asarray(['§Æ¥¿é'] * n, dtype=dtype) + z[:] = a + np.all(a == z[:]) + z.store.close() + # check that datetime generic units are not allowed with pytest.raises(ValueError): self.create_array(shape=100, dtype='M8') @@ -1475,11 +1490,17 @@ def test_attributes(self): a.attrs['foo'] = 'bar' assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) + if self.version > 2: + # in v3, attributes are in a sub-dictionary of the metadata + attrs = attrs['attributes'] assert 'foo' in attrs and attrs['foo'] == 'bar' a.attrs['bar'] = 'foo' assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) + if self.version > 2: + # in v3, attributes are in a sub-dictionary of the metadata + attrs = attrs['attributes'] assert 'foo' in attrs and attrs['foo'] == 'bar' assert 'bar' in attrs and attrs['bar'] == 'foo' a.store.close() @@ -1509,28 +1530,14 @@ def create_array(read_only=False, **kwargs): def test_nchunks_initialized(self): pass - def test_hexdigest(self): - # Check basic 1-D array - z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Mon, 20 Dec 2021 23:45:54 -0500 Subject: [PATCH 057/109] move test_core_v3.py functions back into test_core.py --- zarr/tests/test_core.py | 594 ++++++++++++++++++++++++++++++- zarr/tests/test_core_v3.py | 697 ------------------------------------- 2 files changed, 587 insertions(+), 704 deletions(-) delete mode 100644 zarr/tests/test_core_v3.py diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 0833a58b64..1140fd0b1a 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -18,6 +18,7 @@ from pkg_resources import parse_version from zarr.core import Array +from zarr.errors import ArrayNotFoundError, ContainsGroupError from zarr.meta import json_loads from zarr.n5 import N5Store, N5FSStore, n5_keywords from zarr.storage import ( @@ -30,6 +31,16 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, + # ABSStoreV3, + DBMStoreV3, + DirectoryStoreV3, + FSStoreV3, + KVStoreV3, + LMDBStoreV3, + LRUStoreCacheV3, + NestedDirectoryStoreV3, + SQLiteStoreV3, + StoreV3, atexit_rmglob, atexit_rmtree, init_array, @@ -2232,7 +2243,6 @@ def create_array(read_only=False, **kwargs): return Array(store, read_only=read_only, cache_attrs=cache_attrs, cache_metadata=cache_metadata, write_empty_chunks=write_empty_chunks) - def expected(self): return [ "b80367c5599d47110d42bd8886240c2f46620dba", @@ -2390,7 +2400,8 @@ def create_array(read_only=False, **kwargs): def test_cache_metadata(self): a1 = self.create_array(shape=100, chunks=10, dtype='i1', cache_metadata=False) - a2 = Array(a1.store, cache_metadata=True) + path = None if self.version == 2 else a1.path + a2 = Array(a1.store, path=path, cache_metadata=True) assert a1.shape == a2.shape assert a1.size == a2.size assert a1.nbytes == a2.nbytes @@ -2430,7 +2441,8 @@ def test_cache_metadata(self): def test_cache_attrs(self): a1 = self.create_array(shape=100, chunks=10, dtype='i1', cache_attrs=False) - a2 = Array(a1.store, cache_attrs=True) + path = None if self.version == 2 else 'arr1' + a2 = Array(a1.store, path=path, cache_attrs=True) assert a1.attrs.asdict() == a2.attrs.asdict() # a1 is not caching so *will* see updates made via other objects @@ -2534,7 +2546,8 @@ def test_read_nitems_less_than_blocksize_from_multiple_chunks(self): ''' z = self.create_array(shape=1000000, chunks=100_000) z[40_000:80_000] = 1 - b = Array(z.store, read_only=True, partial_decompress=True) + path = None if self.version == 2 else z.path + b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[40_000:80_000] == 1).all() def test_read_from_all_blocks(self): @@ -2543,7 +2556,8 @@ def test_read_from_all_blocks(self): ''' z = self.create_array(shape=1000000, chunks=100_000) z[2:99_000] = 1 - b = Array(z.store, read_only=True, partial_decompress=True) + path = None if self.version == 2 else z.path + b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[2:99_000] == 1).all() @@ -2617,7 +2631,8 @@ def test_read_nitems_less_than_blocksize_from_multiple_chunks(self): ''' z = self.create_array(shape=1000000, chunks=100_000) z[40_000:80_000] = 1 - b = Array(z.store, read_only=True, partial_decompress=True) + path = None if self.version == 2 else z.path + b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[40_000:80_000] == 1).all() def test_read_from_all_blocks(self): @@ -2626,5 +2641,570 @@ def test_read_from_all_blocks(self): ''' z = self.create_array(shape=1000000, chunks=100_000) z[2:99_000] = 1 - b = Array(z.store, read_only=True, partial_decompress=True) + path = None if self.version == 2 else z.path + b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[2:99_000] == 1).all() + + +#### +# StoreV3 test classes inheriting from the above below this point +#### + +# Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied + +class TestArrayWithPathV3(TestArrayWithPath): + + version = 3 + + @staticmethod + def create_array(array_path='arr1', read_only=False, **kwargs): + store = KVStoreV3(dict()) + kwargs.setdefault('compressor', Zlib(level=1)) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + init_array(store, path=array_path, **kwargs) + return Array(store, path=array_path, read_only=read_only, + cache_metadata=cache_metadata, cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks) + + def test_array_init(self): + + # should not be able to initialize without a path in V3 + store = KVStoreV3(dict()) + with pytest.raises(ValueError): + init_array(store, shape=100, chunks=10, dtype=" Date: Tue, 21 Dec 2021 00:23:53 -0500 Subject: [PATCH 058/109] typing fixes for mypy --- zarr/_storage/store.py | 4 ++-- zarr/storage.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 3e43014a35..3badc367ba 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -337,7 +337,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: for root_prefix in ['meta/root/', 'data/root/']: _src_prefix = root_prefix + src_prefix _dst_prefix = root_prefix + dst_prefix - for key in store.list_prefix(_src_prefix): + for key in store.list_prefix(_src_prefix): # type: ignore new_key = _dst_prefix + key[len(_src_prefix):] store[new_key] = store.pop(key) @@ -360,7 +360,7 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: del store[key] -def _rmdir_from_keys_v3(store: BaseStore, path: Optional[str] = "") -> None: +def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: meta_dir = 'meta/root/' + path meta_dir = meta_dir.rstrip('/') diff --git a/zarr/storage.py b/zarr/storage.py index 03890868dc..f2815f87a4 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -209,7 +209,7 @@ def rmdir(store: StoreLike, path: Path = None): if store_version == 2: _rmdir_from_keys(store, path) else: - _rmdir_from_keys_v3(store, path) + _rmdir_from_keys_v3(store, path) # type: ignore def rename(store: Store, src_path: Path, dst_path: Path): From 7524764e5102ca3f8a5a2e8c8f5505eda19e9594 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 11:03:54 -0500 Subject: [PATCH 059/109] can assume self.keys() exists since BaseStore inherits from MutableMapping --- zarr/_storage/store.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 3badc367ba..0696376439 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -224,11 +224,7 @@ def list_dir(self, prefix): return keys, list(set(prefixes)) def list(self): - if hasattr(self, 'keys'): - return list(self.keys()) - raise NotImplementedError( - "The list method has not been implemented for this store type." - ) + return list(self.keys()) def __contains__(self, key): return key in self.list() From 4824143d6da964331ffd012eb3fcc988b4fd6b19 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 12:24:16 -0500 Subject: [PATCH 060/109] refactor rmdir methods for v3 and improve coverage --- zarr/_storage/store.py | 10 ----- zarr/storage.py | 80 ++++++++++------------------------ zarr/tests/test_convenience.py | 46 ++++++++++++++----- zarr/tests/test_core.py | 35 +++++++++++++++ 4 files changed, 93 insertions(+), 78 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 0696376439..389c40b228 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -232,12 +232,10 @@ def __contains__(self, key): @abc.abstractmethod def __setitem__(self, key, value): """Set a value.""" - return @abc.abstractmethod def __getitem__(self, key): """Get a value.""" - return def clear(self): """Remove all items from store.""" @@ -283,14 +281,6 @@ def _ensure_store(store): f"Zarr.storage.KVStoreV3. Got {store}" ) - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - # allow MutableMapping for backwards compatibility StoreLike = Union[BaseStore, MutableMapping] diff --git a/zarr/storage.py b/zarr/storage.py index f2815f87a4..18b1a45ba3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2932,7 +2932,15 @@ def _get_files_and_dirs_from_path(store, path): return files, dirs -class KVStoreV3(KVStore, StoreV3): +class RmdirV3(): + """Mixin class that can be used to ensure override of v2 rmdir class.""" + + def rmdir(self, path: str = "") -> None: + path = normalize_storage_path(path) + _rmdir_from_keys_v3(self, path) + + +class KVStoreV3(RmdirV3, KVStore, StoreV3): def list(self): return list(self._mutable_mapping.keys()) @@ -2947,10 +2955,6 @@ def __eq__(self, other): self._mutable_mapping == other._mutable_mapping ) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - KVStoreV3.__doc__ = KVStore.__doc__ @@ -3021,11 +3025,9 @@ def rmdir(self, path=None): sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] meta_dir = ('meta/root/' + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx - if array_meta_file in self: - self.fs.rm(array_meta_file) # type: ignore + self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx - if group_meta_file in self: - self.fs.rm(group_meta_file) # type: ignore + self.pop(group_meta_file, None) else: store_path = self.dir_path(path) if self.fs.isdir(store_path): @@ -3106,11 +3108,9 @@ def rmdir(self, path: Path = None): sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] meta_dir = ('meta/root/' + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx - if array_meta_file in self: - self.erase(array_meta_file) # type: ignore + self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx - if group_meta_file in self: - self.erase(group_meta_file) # type: ignore + self.pop(group_meta_file, None) else: # clear out root self.root = self.cls() @@ -3190,13 +3190,9 @@ def rmdir(self, path=None): sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] meta_dir = ('meta/root/' + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx - if array_meta_file in self: - file_path = os.path.join(dir_path, array_meta_file) - os.remove(file_path) + self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx - if group_meta_file in self: - file_path = os.path.join(dir_path, group_meta_file) - os.remove(file_path) + self.pop(group_meta_file, None) elif os.path.isdir(dir_path): shutil.rmtree(dir_path) @@ -3246,10 +3242,6 @@ def getsize(self, path=None): else: return 0 - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - ZipStoreV3.__doc__ = ZipStore.__doc__ @@ -3273,7 +3265,7 @@ def __setitem__(self, key, value): NestedDirectoryStoreV3.__doc__ = NestedDirectoryStore.__doc__ -class RedisStoreV3(RedisStore, StoreV3): +class RedisStoreV3(RmdirV3, RedisStore, StoreV3): def list(self): return list(self.keys()) @@ -3282,15 +3274,11 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - RedisStoreV3.__doc__ = RedisStore.__doc__ -class MongoDBStoreV3(MongoDBStore, StoreV3): +class MongoDBStoreV3(RmdirV3, MongoDBStore, StoreV3): def list(self): return list(self.keys()) @@ -3299,15 +3287,11 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ -class DBMStoreV3(DBMStore, StoreV3): +class DBMStoreV3(RmdirV3, DBMStore, StoreV3): def list(self): return list(self.keys()) @@ -3316,15 +3300,11 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - DBMStoreV3.__doc__ = DBMStore.__doc__ -class LMDBStoreV3(LMDBStore, StoreV3): +class LMDBStoreV3(RmdirV3, LMDBStore, StoreV3): def list(self): return list(self.keys()) @@ -3333,10 +3313,6 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - LMDBStoreV3.__doc__ = LMDBStore.__doc__ @@ -3382,17 +3358,9 @@ def rmdir(self, path=None): sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] meta_dir = ('meta/root/' + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx - if array_meta_file in self: - with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (array_meta_file,) - ) + self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx - if group_meta_file in self: - with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (group_meta_file,) - ) + self.pop(group_meta_file, None) else: self.clear() @@ -3400,7 +3368,7 @@ def rmdir(self, path=None): SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ -class LRUStoreCacheV3(LRUStoreCache, StoreV3): +class LRUStoreCacheV3(RmdirV3, LRUStoreCache, StoreV3): def __init__(self, store, max_size: int): self._store = StoreV3._ensure_store(store) @@ -3420,9 +3388,5 @@ def __setitem__(self, key, value): self._validate_key(key) super().__setitem__(key, value) - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 265ebc0c5d..fb8d65aadc 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -26,10 +26,13 @@ from zarr.hierarchy import Group, group from zarr.storage import ( ConsolidatedMetadataStore, - MemoryStore, - MemoryStoreV3, + DirectoryStoreV3, + FSStoreV3, KVStore, KVStoreV3, + MemoryStore, + MemoryStoreV3, + SQLiteStoreV3, atexit_rmtree, getsize, ) @@ -858,14 +861,14 @@ def test_logging(self, source, dest, tmpdir): class TestCopyV3(TestCopy): - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + @pytest.fixture(params=['zarr', 'hdf5']) def source(self, request, tmpdir): def prep_source(source): foo = source.create_group('foo') foo.attrs['experiment'] = 'weird science' baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) baz.attrs['units'] = 'metres' - if request.param: + if request.param == 'hdf5': extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, shuffle=True, fletcher32=True) else: @@ -874,23 +877,46 @@ def prep_source(source): chunks=(10, 2), dtype='i2', **extra_kws) return source - if request.param: + if request.param == 'hdf5': h5py = pytest.importorskip('h5py') fn = tmpdir.join('source.h5') with h5py.File(str(fn), mode='w') as h5f: yield prep_source(h5f) - else: + elif request.param == 'zarr': yield prep_source(group(path='group1', zarr_version=3)) - - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + elif request.param == 'zarr_fsstore': + fn = tmpdir.join('source.zr3') + store = FSStoreV3(str(fn), auto_mkdir=True) + yield prep_source(group(store, path='group1', zarr_version=3)) + + # Test with various destination StoreV3 types as TestCopyV3 covers rmdir + @pytest.fixture( + params=['zarr', 'zarr_fsstore', 'zarr_kvstore', 'zarr_directorystore', + 'zarr_sqlitestore', 'hdf5'] + ) def dest(self, request, tmpdir): - if request.param: + if request.param == 'hdf5': h5py = pytest.importorskip('h5py') fn = tmpdir.join('dest.h5') with h5py.File(str(fn), mode='w') as h5f: yield h5f - else: + elif request.param == 'zarr': yield group(path='group2', zarr_version=3) + elif request.param == 'zarr_kvstore': + store = KVStoreV3(dict()) + yield group(store, path='group2', zarr_version=3) + elif request.param == 'zarr_fsstore': + fn = tmpdir.join('dest.zr3') + store = FSStoreV3(str(fn), auto_mkdir=True) + yield group(store, path='group2', zarr_version=3) + elif request.param == 'zarr_directorystore': + fn = tmpdir.join('dest.zr3') + store = DirectoryStoreV3(str(fn)) + yield group(store, path='group2', zarr_version=3) + elif request.param == 'zarr_sqlitestore': + fn = tmpdir.join('dest.db') + store = SQLiteStoreV3(str(fn)) + yield group(store, path='group2', zarr_version=3) def test_copy_array_create_options(self, source, dest): dest_h5py = dest.__module__.startswith('h5py.') diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1140fd0b1a..1fa830f0f9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -17,6 +17,11 @@ from numpy.testing import assert_array_almost_equal, assert_array_equal from pkg_resources import parse_version +from zarr._storage.store import ( + _prefix_to_array_key, + _prefix_to_attrs_key, + _prefix_to_group_key +) from zarr.core import Array from zarr.errors import ArrayNotFoundError, ContainsGroupError from zarr.meta import json_loads @@ -2652,6 +2657,36 @@ def test_read_from_all_blocks(self): # Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied + +class TestArrayV3(unittest.TestCase): + + version = 3 + + def test_array_init(self): + + # normal initialization + store = KVStoreV3(dict()) + with pytest.raises(ValueError): + # cannot init_array for v3 without a path + init_array(store, shape=100, chunks=10, dtype=" Date: Tue, 21 Dec 2021 12:38:14 -0500 Subject: [PATCH 061/109] improve coverage of core.py --- zarr/core.py | 3 --- zarr/tests/test_core.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index be9178bdda..38fb2fe8d4 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -253,10 +253,7 @@ def _load_metadata_nosync(self): self._order = meta['chunk_memory_layout'] chunk_separator = meta['chunk_grid']['separator'] if dimension_separator is None: - # TODO: omit attribute in v3? dimension_separator = meta.get('dimension_separator', chunk_separator) - else: - assert chunk_separator == dimension_separator self._dimension_separator = dimension_separator diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1fa830f0f9..5e13b41c7f 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2715,6 +2715,7 @@ def test_array_init(self): path = 'foo/bar' init_array(store, shape=100, chunks=10, path=path, dtype=' Date: Tue, 21 Dec 2021 12:38:28 -0500 Subject: [PATCH 062/109] improve coverage of convenience.py --- zarr/tests/test_convenience.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index fb8d65aadc..f3a97da256 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -122,6 +122,19 @@ def test_save_errors(zarr_version): save('data/group.zarr', zarr_version=zarr_version) +def test_zarr_v3_save_multiple_unnamed(): + x = np.ones(8) + y = np.zeros(8) + store = KVStoreV3(dict()) + # no path provided + save_group(store, x, y, path='dataset', zarr_version=3) + # names become arr_{i} for unnamed *args + assert 'data/root/dataset/arr_0/c0' in store + assert 'data/root/dataset/arr_1/c0' in store + assert 'meta/root/dataset/arr_0.array.json' in store + assert 'meta/root/dataset/arr_1.array.json' in store + + def test_zarr_v3_save_errors(): x = np.ones(8) with pytest.raises(ValueError): From b0a963a817435a09e39be306684579164bbadfd7 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 12:48:51 -0500 Subject: [PATCH 063/109] expend info tests needed to also test with a size > 10**12 to improve coverage --- zarr/tests/test_info.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index 361490c0a8..07a14bceed 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -1,15 +1,18 @@ import numcodecs +import pytest import zarr +from zarr.util import InfoReporter -def test_info(): +@pytest.mark.parametrize('array_size', [10, 15000]) +def test_info(array_size): # setup g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) g.create_group('foo') - z = g.zeros('bar', shape=10, filters=[numcodecs.Adler32()]) + z = g.zeros('bar', shape=array_size, filters=[numcodecs.Adler32()]) # test group info items = g.info_items() @@ -20,6 +23,10 @@ def test_info(): ]) assert expected_keys == keys + # can also get a string representation of info via the info attribute + assert isinstance(g.info, InfoReporter) + assert "Type" in repr(g.info) + # test array info items = z.info_items() keys = sorted([k for k, _ in items]) @@ -29,3 +36,9 @@ def test_info(): 'No. bytes stored', 'Storage ratio', 'Chunks initialized', 'Name' ]) assert expected_keys == keys + + # can also get a string representation of info via the info attribute + assert isinstance(z.info, InfoReporter) + assert "Type" in repr(z.info) + + From 2b79be1a9caf65994736b9da561d07415069f476 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 12:57:59 -0500 Subject: [PATCH 064/109] Expand tests of Array.view --- zarr/tests/test_core.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 5e13b41c7f..690640d3cb 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2780,6 +2780,29 @@ def test_nbytes_stored(self): z.store.close() + def test_view(self): + + # dict as store + z = self.create_array(shape=1005, chunks=100, dtype=float) + + # with with different dtype + x = z.view(dtype=bytes) + assert x.is_view + assert x.dtype == bytes + + new_shape = (1, z.shape[0]) + x = z.view(shape=new_shape) + assert x.is_view + assert x.shape == new_shape + + x = z.view(chunks=10) + assert x.is_view + assert x.chunks == (10,) + + x = z.view(fill_value=5) + assert x.is_view + assert x[-1] == 5 + def test_nchunks_initialized(self): # copied from TestArray so the empty version from TestArrayWithPath is # not used From 02d95350aa59ae5762b4a0ccc087d0445a1fb722 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 13:08:41 -0500 Subject: [PATCH 065/109] improve coverage of creation.py --- zarr/tests/test_creation.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index e92a9caf62..0f247028ea 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -306,6 +306,14 @@ def test_open_array(zarr_version, dimension_separator): assert os.path.abspath(chunk_store) == z.chunk_store.path +def test_open_array_none(): + + # open with both store and zarr_version = None + z = open_array(mode='w', shape=100, chunks=10) + assert isinstance(z, Array) + assert z._version == 2 + + @pytest.mark.parametrize('dimension_separator', ['.', '/', None]) @pytest.mark.parametrize('zarr_version', [2, 3]) def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): @@ -579,6 +587,10 @@ def test_create(zarr_version): # defaults z = create(100, **kwargs) + if zarr_version == 3: + with pytest.raises(ValueError): + # cannot create without specifying a path + z = create(100, zarr_version=3) assert isinstance(z, Array) assert (100,) == z.shape assert (100,) == z.chunks # auto-chunks From f2dbf7bd4a80308d23be9874ca1bd0324cd7e9f4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 13:42:00 -0500 Subject: [PATCH 066/109] improve coverage of hierarchy.py --- zarr/hierarchy.py | 7 ------ zarr/tests/test_hierarchy.py | 46 +++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 53db0e617e..6558342753 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -116,9 +116,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: zarr_version = getattr(store, '_store_version', 2) - if zarr_version > 2 and path: - if path.startswith(("meta/", "data/")): - raise ValueError("path must note start with 'meta/' or 'data/'") if chunk_store is not None: chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) if not getattr(chunk_store, '_store_version', 2) == zarr_version: @@ -154,8 +151,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, raise GroupNotFoundError(path) else: implicit_prefix = 'meta/root/' + self._key_prefix - if not implicit_prefix.endswith('/'): - implicit_prefix += '/' if self._store.list_prefix(implicit_prefix): # implicit group does not have any metadata self._meta = None @@ -382,8 +377,6 @@ def __contains__(self, item): False """ - if self._version > 2 and item.startswith('meta/'): - raise ValueError("meta/ must not be in item") path = self._item_path(item) return contains_array(self._store, path) or \ contains_group(self._store, path, explicit_only=False) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index b54f3211cc..f48dde939d 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -92,6 +92,11 @@ def test_group_init_errors_1(self): Group(store, chunk_store=chunk_store) store.close() + def test_group_repr(self): + store, chunk_store = self.create_store() + g = self.create_group(store, chunk_store=chunk_store) + assert g.name in repr(g) + def test_group_init_errors_2(self): store, chunk_store = self.create_store() init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) @@ -762,9 +767,6 @@ def test_iterators_recurse(self): d3 = g2.create_dataset('zab', shape=2000, chunks=200) d3[:] = np.arange(2000) - if g1._version == 3: - pytest.skip("TODO: fix for V3") - # test recursive array_keys array_keys = list(g1['foo'].array_keys(recurse=False)) array_keys_recurse = list(g1['foo'].array_keys(recurse=True)) @@ -1173,6 +1175,7 @@ def test_round_trip_nd(self): f = open_group(store, mode='w') f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) + assert name in f h = open_group(store, mode='r') np.testing.assert_array_equal(h[name][:], data) @@ -1461,6 +1464,9 @@ def test_group(zarr_version): assert '/' == g.name else: g = group(path='group1', zarr_version=zarr_version) + with pytest.raises(ValueError): + # must supply path for v3 groups + group(zarr_version=3) assert 'group1' == g.path assert '/group1' == g.name assert isinstance(g, Group) @@ -1741,3 +1747,37 @@ def test_tree(zarr_version): └── quux └── baz (100,) float64""") _check_tree(g3, expect_bytes, expect_text) + + +def test_group_mismatched_store_versions(): + store_v3 = KVStoreV3(dict()) + store_v2 = KVStore(dict()) + + # separate chunk store + chunk_store_v2 = KVStore(dict()) + chunk_store_v3 = KVStoreV3(dict()) + + init_group(store_v2, path='group1', chunk_store=chunk_store_v2) + init_group(store_v3, path='group1', chunk_store=chunk_store_v3) + + g1_v3 = Group(store_v3, path='group1', read_only=True, chunk_store=chunk_store_v3) + assert isinstance(g1_v3._store, KVStoreV3) + g1_v2 = Group(store_v2, path='group1', read_only=True, chunk_store=chunk_store_v2) + assert isinstance(g1_v2._store, KVStore) + + # store and chunk_store must have the same zarr protocol version + with pytest.raises(ValueError): + Group(store_v3, path='group1', read_only=False, chunk_store=chunk_store_v2) + with pytest.raises(ValueError): + Group(store_v2, path='group1', read_only=False, chunk_store=chunk_store_v3) + with pytest.raises(ValueError): + open_group(store_v2, path='group1', chunk_store=chunk_store_v3) + with pytest.raises(ValueError): + open_group(store_v3, path='group1', chunk_store=chunk_store_v2) + + # raises Value if read_only and path is not a pre-existing group + with pytest.raises(ValueError): + Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + with pytest.raises(ValueError): + Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + From f3378b8ba4d30fd02edcc7f9cacfcb4ecee80faf Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 14:30:22 -0500 Subject: [PATCH 067/109] improve coverage of meta.py --- zarr/meta.py | 2 +- zarr/tests/test_meta.py | 119 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 9d85251e10..bb4bae4199 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -43,7 +43,7 @@ ) -def get_extended_dtype_info(dtype): +def get_extended_dtype_info(dtype) -> dict: if dtype.str in _v3_complex_types: return dict( extension="https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/extensions/complex-dtypes/v1.0.html", # noqa diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 5469921110..bec05f2d4a 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -1,4 +1,5 @@ import base64 +import copy import json import numpy as np @@ -8,7 +9,10 @@ from zarr.errors import MetadataError from zarr.meta import (ZARR_FORMAT, decode_array_metadata, decode_dtype, decode_group_metadata, encode_array_metadata, - encode_dtype, encode_fill_value, decode_fill_value) + encode_dtype, encode_fill_value, decode_fill_value, + get_extended_dtype_info, _v3_complex_types, + _v3_datetime_types, _default_entry_point_metadata_v3, + Metadata3) from zarr.util import normalize_dtype, normalize_fill_value @@ -260,6 +264,56 @@ def test_encode_decode_array_dtype_shape(): assert meta_dec['filters'] is None +def test_encode_decode_array_dtype_shape_v3(): + + meta = dict( + shape=(100,), + chunk_grid=dict(type='regular', + chunk_shape=(10,), + separator=('/')), + data_type=np.dtype('(10, 10)U4', ' Date: Tue, 21 Dec 2021 14:31:38 -0500 Subject: [PATCH 068/109] pep8 --- zarr/tests/test_hierarchy.py | 1 - zarr/tests/test_info.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index f48dde939d..a5a5450ed4 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1780,4 +1780,3 @@ def test_group_mismatched_store_versions(): Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) with pytest.raises(ValueError): Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) - diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index 07a14bceed..434d19d1f7 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -40,5 +40,3 @@ def test_info(array_size): # can also get a string representation of info via the info attribute assert isinstance(z.info, InfoReporter) assert "Type" in repr(z.info) - - From 20338c47abbe7905407fbc2d921d697ed782b715 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 15:39:52 -0500 Subject: [PATCH 069/109] skip FSStoreV3 test when fsspec not installed --- zarr/tests/test_convenience.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index f3a97da256..5f229537ac 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -36,6 +36,7 @@ atexit_rmtree, getsize, ) +from zarr.tests.util import have_fsspec def _init_creation_kwargs(zarr_version): @@ -897,16 +898,13 @@ def prep_source(source): yield prep_source(h5f) elif request.param == 'zarr': yield prep_source(group(path='group1', zarr_version=3)) - elif request.param == 'zarr_fsstore': - fn = tmpdir.join('source.zr3') - store = FSStoreV3(str(fn), auto_mkdir=True) - yield prep_source(group(store, path='group1', zarr_version=3)) # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - @pytest.fixture( - params=['zarr', 'zarr_fsstore', 'zarr_kvstore', 'zarr_directorystore', - 'zarr_sqlitestore', 'hdf5'] - ) + destinations = ['zarr', 'zarr_fsstore', 'zarr_kvstore', 'zarr_directorystore', + 'zarr_sqlitestore', 'hdf5'] + if have_fsspec: + destinations += ['zarr_fsstore'] + @pytest.fixture(params=destinations) def dest(self, request, tmpdir): if request.param == 'hdf5': h5py = pytest.importorskip('h5py') From 70ea4a58cc1892acfddfb85eb90981f4b294d984 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 15:41:09 -0500 Subject: [PATCH 070/109] test raising of PermissionError for setter on views --- zarr/tests/test_core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 690640d3cb..2ffed6fb3f 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2803,6 +2803,9 @@ def test_view(self): assert x.is_view assert x[-1] == 5 + with pytest.raises(PermissionError): + x.fill_value = 8 + def test_nchunks_initialized(self): # copied from TestArray so the empty version from TestArrayWithPath is # not used From f2982d0fb5149dea37cde60520b45fd4122c11c9 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 15:44:44 -0500 Subject: [PATCH 071/109] remove redundant check (_normalize_store_arg will already raise here) --- zarr/hierarchy.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 6558342753..08cc18c9b1 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -118,8 +118,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, zarr_version = getattr(store, '_store_version', 2) if chunk_store is not None: chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) - if not getattr(chunk_store, '_store_version', 2) == zarr_version: - raise ValueError("zarr_version of store and chunk_store must match") self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) From 9bc92c5bcf12849527caca9fe3f9f0a86ff6bed6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 16:09:34 -0500 Subject: [PATCH 072/109] improve coverage and fix bugs in normalize_store_arg --- zarr/storage.py | 2 +- zarr/tests/test_storage.py | 18 +++++++++++++++++- zarr/tests/test_storage_v3.py | 27 ++++++++++++++++++++++++++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 18b1a45ba3..5f5437b80d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -165,7 +165,7 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", store = FSStoreV3(store, mode=mode, **(storage_options or {})) elif storage_options: store = ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): + elif store.endswith('.zip'): store = ZipStoreV3(store, mode=mode) elif store.endswith('.n5'): raise NotImplementedError("N5Store not yet implemented for V3") diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index bc0bf4a066..982a50f7ed 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -31,7 +31,8 @@ Store, TempStore, ZipStore, array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, - group_meta_key, init_array, init_group, migrate_1to2) + group_meta_key, init_array, init_group, migrate_1to2, + normalize_store_arg) from zarr.storage import FSStore, rename, listdir from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container @@ -2395,3 +2396,18 @@ def test_get_hierarchy_metadata_v2(): # v2 stores do not have hierarchy metadata (i.e. zarr.json) with pytest.raises(ValueError): _get_hierarchy_metadata(KVStore(dict)) + + +def test_normalize_store_arg(tmpdir): + with pytest.raises(ValueError): + normalize_store_arg(dict(), zarr_version=4) + + for ext, Class in [('.zip', ZipStore), ('.n5', N5Store)]: + fn = tmpdir.join('store' + ext) + store = normalize_store_arg(str(fn), zarr_version=2, mode='w', clobber=True) + assert isinstance(store, Class) + + if have_fsspec: + path = tempfile.mkdtemp() + store = normalize_store_arg("file://" + path, zarr_version=2, mode='w', clobber=True) + assert isinstance(store, FSStore) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 039961ec1b..9e73772c59 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -13,7 +13,7 @@ DirectoryStoreV3, NestedDirectoryStoreV3, RedisStoreV3, MongoDBStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3, - StoreV3) + StoreV3, normalize_store_arg, KVStore) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var from .test_storage import ( @@ -447,3 +447,28 @@ class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): # TODO: implement ABSStoreV3 # @skip_test_env_var("ZARR_TEST_ABS") # class TestABSStoreV3(_TestABSStore, StoreV3Tests): + +def test_normalize_store_arg_v3(tmpdir): + + fn = tmpdir.join('store.zip') + store = normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True) + assert isinstance(store, ZipStoreV3) + assert 'zarr.json' in store + + if have_fsspec: + path = tempfile.mkdtemp() + store = normalize_store_arg("file://" + path, zarr_version=3, mode='w', clobber=True) + assert isinstance(store, FSStoreV3) + assert 'zarr.json' in store + + fn = tmpdir.join('store.n5') + with pytest.raises(NotImplementedError): + normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True) + + # error on zarr_version=3 with a v2 store + with pytest.raises(ValueError): + normalize_store_arg(KVStore(dict()), zarr_version=3, mode='w', clobber=True) + + # error on zarr_version=2 with a v3 store + with pytest.raises(ValueError): + normalize_store_arg(KVStoreV3(dict()), zarr_version=2, mode='w', clobber=True) From 82be4c90adce573fd79e9a297e786457b435023f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 16:30:17 -0500 Subject: [PATCH 073/109] improve coverage of storage.py remove redundant getsize methods --- zarr/storage.py | 53 ++--------------------------------- zarr/tests/test_storage.py | 7 ++++- zarr/tests/test_storage_v3.py | 15 +++++++++- 3 files changed, 22 insertions(+), 53 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 5f5437b80d..e15bf517e3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -234,27 +234,6 @@ def listdir(store: BaseStore, path: Path = None): if hasattr(store, 'listdir'): # pass through return store.listdir(path) # type: ignore - elif getattr(store, "_store_version", None) == 3: - meta_prefix = 'meta/root/' - dir_path = meta_prefix + path - path_start = len(meta_prefix) - meta_keys = [] - include_meta_keys = False - if include_meta_keys: - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - group_meta_key = dir_path + '.group' + sfx - if group_meta_key in store: - meta_keys.append(group_meta_key[path_start:]) - array_meta_key = dir_path + '.array' + sfx - if array_meta_key in store: - meta_keys.append(array_meta_key[path_start:]) - if not dir_path.endswith('/'): - dir_path += '/' - keys, prefixes = store.list_dir(dir_path) # type: ignore - keys = [k[path_start:] for k in keys] - prefixes = [p[path_start:] for p in prefixes] - return meta_keys + keys + prefixes - else: # slow version, iterate through all keys warnings.warn( @@ -3060,21 +3039,7 @@ def list(self): return list(self.keys()) def getsize(self, path: Path = None): - size = 0 - path = normalize_storage_path(path) - members = self.list_prefix('data/root/' + path) - members += self.list_prefix('meta/root/' + path) - for k in members: - try: - v = self[k] - except KeyError: - pass - else: - try: - size += buffer_size(v) - except TypeError: - return -1 - return size + return _getsize(self, path) def rename(self, src_path: Path, dst_path: Path): src_path = normalize_storage_path(src_path) @@ -3135,21 +3100,7 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def getsize(self, path: Path = None): - size = 0 - path = normalize_storage_path(path) - members = self.list_prefix('data/root/' + path) - members += self.list_prefix('meta/root/' + path) - for k in members: - try: - v = self[k] - except KeyError: - pass - else: - try: - size += buffer_size(v) - except TypeError: - return -1 - return size + return _getsize(self, path) def rename(self, src_path, dst_path, metadata_key_suffix='.json'): store_src_path = normalize_storage_path(src_path) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 982a50f7ed..85fc51ab64 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -28,7 +28,7 @@ DictStore, DirectoryStore, KVStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, - Store, TempStore, ZipStore, + Store, TempStore, ZipStore, KVStoreV3, array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2, @@ -2347,6 +2347,11 @@ def test_bad_format(self): with pytest.raises(MetadataError): ConsolidatedMetadataStore(store) + def test_bad_store_version(self): + with pytest.raises(MetadataError): + ConsolidatedMetadataStore(KVStoreV3(dict())) + + def test_read_write(self): # setup store with consolidated metadata diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 9e73772c59..e50b81abbf 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -13,7 +13,7 @@ DirectoryStoreV3, NestedDirectoryStoreV3, RedisStoreV3, MongoDBStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3, - StoreV3, normalize_store_arg, KVStore) + StoreV3, normalize_store_arg, KVStore, listdir) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var from .test_storage import ( @@ -209,6 +209,14 @@ def test_list_prefix(self): with pytest.raises(ValueError): store.list_prefix(prefix='/meta/root') + def test_equal(self): + store = self.create_store() + assert store == store + + def test_rename_nonexisting(self): + store = self.create_store() + with pytest.raises(ValueError): + store.rename('meta/root/a', 'meta/root/b') class TestMappingStoreV3(StoreV3Tests): @@ -238,6 +246,11 @@ def create_store(self, normalize_keys=False, **kwargs): store = DirectoryStoreV3(path, normalize_keys=normalize_keys, **kwargs) return store + def test_rename_nonexisting(self): + store = self.create_store() + with pytest.raises(FileNotFoundError): + store.rename('meta/root/a', 'meta/root/b') + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStoreV3(_TestFSStore, StoreV3Tests): From 8bea4b62608f06a89ba5b68ee33c627157d08c32 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 16:32:05 -0500 Subject: [PATCH 074/109] pep8 --- zarr/tests/test_convenience.py | 1 + zarr/tests/test_storage.py | 1 - zarr/tests/test_storage_v3.py | 3 ++- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 5f229537ac..1327ea6820 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -904,6 +904,7 @@ def prep_source(source): 'zarr_sqlitestore', 'hdf5'] if have_fsspec: destinations += ['zarr_fsstore'] + @pytest.fixture(params=destinations) def dest(self, request, tmpdir): if request.param == 'hdf5': diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 85fc51ab64..28d2444d33 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2351,7 +2351,6 @@ def test_bad_store_version(self): with pytest.raises(MetadataError): ConsolidatedMetadataStore(KVStoreV3(dict())) - def test_read_write(self): # setup store with consolidated metadata diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index e50b81abbf..0b02fa0ab4 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -13,7 +13,7 @@ DirectoryStoreV3, NestedDirectoryStoreV3, RedisStoreV3, MongoDBStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3, - StoreV3, normalize_store_arg, KVStore, listdir) + StoreV3, normalize_store_arg, KVStore) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var from .test_storage import ( @@ -218,6 +218,7 @@ def test_rename_nonexisting(self): with pytest.raises(ValueError): store.rename('meta/root/a', 'meta/root/b') + class TestMappingStoreV3(StoreV3Tests): def create_store(self, **kwargs): From ce64fab365fe14560bd6274bcff931503f07d711 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 21:02:23 -0500 Subject: [PATCH 075/109] fix StoreV3 tests --- zarr/_storage/store.py | 35 +++++++++++++++++++++++++---------- zarr/storage.py | 2 ++ zarr/tests/test_storage.py | 2 +- zarr/tests/test_storage_v3.py | 9 ++++++--- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 389c40b228..82ac40b211 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -309,6 +309,26 @@ def _get_hierarchy_metadata(store=None): return meta + +def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: + """Rename source or group metadata file associated with src_path.""" + any_renamed = False + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + src_path = src_path.rstrip('/') + dst_path = dst_path.rstrip('/') + _src_array_json = 'meta/root/' + src_path + '.array' + sfx + if _src_array_json in store: + new_key = 'meta/root/' + dst_path + '.array' + sfx + store[new_key] = store.pop(_src_array_json) + any_renamed = True + _src_group_json = 'meta/root/' + dst_path + '.group' + sfx + if _src_group_json in store: + new_key = 'meta/root/' + dst_path + '.group' + sfx + store[new_key] = store.pop(_src_group_json) + any_renamed = True + return any_renamed + + def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) @@ -320,22 +340,17 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: new_key = dst_prefix + key.lstrip(src_prefix) store[new_key] = store.pop(key) else: + any_renamed = False for root_prefix in ['meta/root/', 'data/root/']: _src_prefix = root_prefix + src_prefix _dst_prefix = root_prefix + dst_prefix for key in store.list_prefix(_src_prefix): # type: ignore new_key = _dst_prefix + key[len(_src_prefix):] store[new_key] = store.pop(key) - - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - _src_array_json = 'meta/root/' + src_prefix[:-1] + '.array' + sfx - if _src_array_json in store: - new_key = 'meta/root/' + dst_prefix[:-1] + '.array' + sfx - store[new_key] = store.pop(_src_array_json) - _src_group_json = 'meta/root/' + src_prefix[:-1] + '.group' + sfx - if _src_group_json in store: - new_key = 'meta/root/' + dst_prefix[:-1] + '.group' + sfx - store[new_key] = store.pop(_src_group_json) + any_renamed = True + any_renamed = any_renamed or _rename_metadata_v3(store, src_path, dst_path) + if not any_renamed: + raise ValueError(f"no item {src_path} found to rename") def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: diff --git a/zarr/storage.py b/zarr/storage.py index e15bf517e3..f8e9174042 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -61,6 +61,7 @@ from zarr._storage.store import (_get_hierarchy_metadata, _listdir_from_keys, _rename_from_keys, + _rename_metadata_v3, _rmdir_from_keys, _rmdir_from_keys_v3, _path_to_prefix, @@ -3053,6 +3054,7 @@ def rename(self, src_path: Path, dst_path: Path): dst_parent[dst_key] = src_parent.pop(src_key) any_renamed = True + any_renamed = any_renamed or _rename_metadata_v3(self, src_path, dst_path) if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 28d2444d33..f6c7e0fdc0 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2348,7 +2348,7 @@ def test_bad_format(self): ConsolidatedMetadataStore(store) def test_bad_store_version(self): - with pytest.raises(MetadataError): + with pytest.raises(ValueError): ConsolidatedMetadataStore(KVStoreV3(dict())) def test_read_write(self): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 0b02fa0ab4..662be06d3d 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -215,9 +215,12 @@ def test_equal(self): def test_rename_nonexisting(self): store = self.create_store() - with pytest.raises(ValueError): - store.rename('meta/root/a', 'meta/root/b') - + if store.is_erasable(): + with pytest.raises(ValueError): + store.rename('a', 'b') + else: + with pytest.raises(NotImplementedError): + store.rename('a', 'b') class TestMappingStoreV3(StoreV3Tests): From b5a691b62244e350b31f360beb58fdac54d48521 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 21:19:32 -0500 Subject: [PATCH 076/109] fix duplicate zarr_fsstore entry --- zarr/tests/test_convenience.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 1327ea6820..821598712f 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -900,8 +900,7 @@ def prep_source(source): yield prep_source(group(path='group1', zarr_version=3)) # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - destinations = ['zarr', 'zarr_fsstore', 'zarr_kvstore', 'zarr_directorystore', - 'zarr_sqlitestore', 'hdf5'] + destinations = ['hdf5', 'zarr', 'zarr_kvstore', 'zarr_directorystore', 'zarr_sqlitestore'] if have_fsspec: destinations += ['zarr_fsstore'] From 47e43549ce157c4f0858f28fca928752864b49f4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 21:35:09 -0500 Subject: [PATCH 077/109] fix rename --- zarr/_storage/store.py | 5 ++--- zarr/storage.py | 2 +- zarr/tests/test_hierarchy.py | 4 ++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 82ac40b211..599ab1c279 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -309,7 +309,6 @@ def _get_hierarchy_metadata(store=None): return meta - def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: """Rename source or group metadata file associated with src_path.""" any_renamed = False @@ -321,7 +320,7 @@ def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: new_key = 'meta/root/' + dst_path + '.array' + sfx store[new_key] = store.pop(_src_array_json) any_renamed = True - _src_group_json = 'meta/root/' + dst_path + '.group' + sfx + _src_group_json = 'meta/root/' + src_path + '.group' + sfx if _src_group_json in store: new_key = 'meta/root/' + dst_path + '.group' + sfx store[new_key] = store.pop(_src_group_json) @@ -348,7 +347,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: new_key = _dst_prefix + key[len(_src_prefix):] store[new_key] = store.pop(key) any_renamed = True - any_renamed = any_renamed or _rename_metadata_v3(store, src_path, dst_path) + any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/storage.py b/zarr/storage.py index f8e9174042..012635aec7 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -3054,7 +3054,7 @@ def rename(self, src_path: Path, dst_path: Path): dst_parent[dst_key] = src_parent.pop(src_key) any_renamed = True - any_renamed = any_renamed or _rename_metadata_v3(self, src_path, dst_path) + any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a5a5450ed4..1df89db621 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -841,7 +841,11 @@ def test_move(self): data = np.arange(100) g['foo'] = data + if g._store._store_version == 3: + print(f"before move: {g._store.list()}") g.move("foo", "bar") + if g._store._store_version == 3: + print(f"after move: {g._store.list()}") assert "foo" not in g assert "bar" in g assert_array_equal(data, g["bar"]) From 2415d85feb7dcf5c2e59d8d191a5c57839c6e9db Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 21:35:31 -0500 Subject: [PATCH 078/109] remove debug statements --- zarr/tests/test_hierarchy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 1df89db621..a5a5450ed4 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -841,11 +841,7 @@ def test_move(self): data = np.arange(100) g['foo'] = data - if g._store._store_version == 3: - print(f"before move: {g._store.list()}") g.move("foo", "bar") - if g._store._store_version == 3: - print(f"after move: {g._store.list()}") assert "foo" not in g assert "bar" in g assert_array_equal(data, g["bar"]) From 98120df602d530598f86cd2a759670c2be345f8e Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 21:54:17 -0500 Subject: [PATCH 079/109] fix typo --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 012635aec7..eb8464afde 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -3054,7 +3054,7 @@ def rename(self, src_path: Path, dst_path: Path): dst_parent[dst_key] = src_parent.pop(src_key) any_renamed = True - any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed + any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename") From a99519c0e3911faa2457a258647ca12a61158b7a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 22:19:37 -0500 Subject: [PATCH 080/109] skip unavailable NumPy dtypes --- zarr/tests/test_meta.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index bec05f2d4a..83029e52d6 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -563,7 +563,11 @@ def test_get_extended_dtype_info(): extended_types += ['|S4', '|S8', '>U4', ' Date: Tue, 21 Dec 2021 22:30:23 -0500 Subject: [PATCH 081/109] pep8 --- zarr/_storage/store.py | 2 +- zarr/storage.py | 2 +- zarr/tests/test_storage_v3.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 599ab1c279..8cad7f33f7 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -347,7 +347,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: new_key = _dst_prefix + key[len(_src_prefix):] store[new_key] = store.pop(key) any_renamed = True - any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed + any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/storage.py b/zarr/storage.py index eb8464afde..65c2e31b6a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -3054,7 +3054,7 @@ def rename(self, src_path: Path, dst_path: Path): dst_parent[dst_key] = src_parent.pop(src_key) any_renamed = True - any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed + any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 662be06d3d..157cd641f3 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -222,6 +222,7 @@ def test_rename_nonexisting(self): with pytest.raises(NotImplementedError): store.rename('a', 'b') + class TestMappingStoreV3(StoreV3Tests): def create_store(self, **kwargs): From eec4a3e7bda1ce3af93556cacdc8d1677bb859b4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 22:44:36 -0500 Subject: [PATCH 082/109] mypy fixes --- zarr/_storage/store.py | 4 +++- zarr/storage.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 8cad7f33f7..18e6a611d0 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -347,7 +347,9 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: new_key = _dst_prefix + key[len(_src_prefix):] store[new_key] = store.pop(key) any_renamed = True - any_renamed = _rename_metadata_v3(store, src_path, dst_path) or any_renamed + any_meta_renamed = _rename_metadata_v3(store, src_path, dst_path) # type: ignore + any_renamed = any_meta_renamed or any_renamed + if not any_renamed: raise ValueError(f"no item {src_path} found to rename") diff --git a/zarr/storage.py b/zarr/storage.py index 65c2e31b6a..8c31189e2d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2913,11 +2913,11 @@ def _get_files_and_dirs_from_path(store, path): class RmdirV3(): - """Mixin class that can be used to ensure override of v2 rmdir class.""" + """Mixin class that can be used to ensure override of any existing v2 rmdir class.""" def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) + _rmdir_from_keys_v3(self, path) # type: ignore class KVStoreV3(RmdirV3, KVStore, StoreV3): From 5970c695de165fb1b5a68c72cfdc812c8a3b1c7c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 23:10:49 -0500 Subject: [PATCH 083/109] remove redundant check (already done above) --- zarr/storage.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8c31189e2d..13276f4864 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -178,18 +178,8 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", return store elif zarr_version == 2: store = Store._ensure_store(store) - if getattr(store, '_store_version', 2) != 2: - raise ValueError( - "provided store does not match the specified zarr version.") - # if not isinstance(store, Store) and isinstance(store, MutableMapping): - # store = KVStore(store) elif zarr_version == 3: store = StoreV3._ensure_store(store) - if getattr(store, '_store_version', 2) != 3: - raise ValueError( - "provided store does not match the specified zarr version.") - # if not isinstance(store, StoreV3) and isinstance(store, MutableMapping): - # store = KVStoreV3(store) if 'zarr.json' not in store: # add default zarr.json metadata store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) From 77f6fee3335f22bfff4ad840b15e11de68449ba1 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 21 Dec 2021 23:44:01 -0500 Subject: [PATCH 084/109] remove KeyError check. list_prefix only returns keys that exist --- zarr/storage.py | 10 +++------- zarr/tests/test_hierarchy.py | 16 +++++++++++++--- zarr/tests/test_storage.py | 2 ++ zarr/tests/test_storage_v3.py | 5 +++++ 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 13276f4864..7bf989010a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -165,7 +165,7 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w", if "://" in store or "::" in store: store = FSStoreV3(store, mode=mode, **(storage_options or {})) elif storage_options: - store = ValueError("storage_options passed with non-fsspec path") + raise ValueError("storage_options passed with non-fsspec path") elif store.endswith('.zip'): store = ZipStoreV3(store, mode=mode) elif store.endswith('.n5'): @@ -3169,12 +3169,8 @@ def getsize(self, path=None): if children: size = 0 for name in children: - try: - info = self.zf.getinfo(name) - except KeyError: - pass - else: - size += info.compress_size + info = self.zf.getinfo(name) + size += info.compress_size return size elif path: try: diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a5a5450ed4..7e9a8ebd41 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -31,7 +31,7 @@ FSStoreV3, NestedDirectoryStoreV3, ZipStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) -from zarr.util import InfoReporter +from zarr.util import InfoReporter, buffer_size from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container @@ -1185,7 +1185,6 @@ class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): @staticmethod def create_store(): - pytest.skip("TODO: Fix for V3") path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = FSStoreV3(path) @@ -1202,6 +1201,18 @@ def test_round_trip_nd(self): h = open_group(store, path='group', mode='r') np.testing.assert_array_equal(h[name][:], data) + f = open_group(store, path='group2', mode='w') + + data_size = data.nbytes + group_meta_size = buffer_size(store['meta/root/group.group.json']) + group2_meta_size = buffer_size(store['meta/root/group2.group.json']) + array_meta_size = buffer_size(store['meta/root/group/raw.array.json']) + assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size + # added case with path to complete coverage + assert store.getsize('group') == data_size + group_meta_size + array_meta_size + assert store.getsize('group2') == group2_meta_size + assert store.getsize('group/raw') == data_size + array_meta_size + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithNestedFSStore(TestGroupWithFSStore): @@ -1231,7 +1242,6 @@ class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): @staticmethod def create_store(): - pytest.skip("TODO: Fix for V3") path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = FSStoreV3(path, key_separator='/', auto_mkdir=True) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7971710f69..8ab1731910 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -346,6 +346,8 @@ def test_hierarchy(self): # TODO: proper behavior of getsize? # v3 returns size of all nested arrays, not just the # size of the arrays in the current folder. + if isinstance(store, ZipStoreV3): + 1 / 0 if self.version == 2: assert 6 == store.getsize() else: diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 157cd641f3..e1e510ee4d 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -473,6 +473,11 @@ def test_normalize_store_arg_v3(tmpdir): assert isinstance(store, ZipStoreV3) assert 'zarr.json' in store + # can't pass storage_options to non-fsspec store + with pytest.raises(ValueError): + normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True, + storage_options={"some": "kwargs"}) + if have_fsspec: path = tempfile.mkdtemp() store = normalize_store_arg("file://" + path, zarr_version=3, mode='w', clobber=True) From f2b3ed8e43fc608cb70633704162d505eda2a5e6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Dec 2021 00:17:01 -0500 Subject: [PATCH 085/109] coverage fixes --- zarr/storage.py | 13 +++++++------ zarr/tests/test_core.py | 6 ++++++ zarr/tests/test_creation.py | 4 ++-- zarr/tests/test_hierarchy.py | 10 ++-------- zarr/tests/test_storage.py | 14 +++----------- zarr/tests/test_storage_v3.py | 16 ++++++++-------- 6 files changed, 28 insertions(+), 35 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7bf989010a..9c808cbfb5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2958,6 +2958,9 @@ def getsize(self, path=None): dir_path = os.path.join(self.path, d) if os.path.exists(dir_path): dirs.append(dir_path) + elif path in self: + # access individual element by full path + return buffer_size(self[path]) else: files, dirs = _get_files_and_dirs_from_path(self, path) for file in files: @@ -3166,18 +3169,16 @@ def getsize(self, path=None): with self.mutex: children = self.list_prefix('data/root/' + path) children += self.list_prefix('meta/root/' + path) + print(f"path={path}, children={children}") if children: size = 0 for name in children: info = self.zf.getinfo(name) size += info.compress_size return size - elif path: - try: - info = self.zf.getinfo(path) - return info.compress_size - except KeyError: - return 0 + elif path in self: + info = self.zf.getinfo(path) + return info.compress_size else: return 0 diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 2ffed6fb3f..cdc41b17ba 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -3096,6 +3096,12 @@ def test_nbytes_stored(self): expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json') assert expect_nbytes_stored == z.nbytes_stored + def test_len(self): + + # dict as store + z = self.create_array(shape=1000, chunks=100) + assert len(z._store) == 2 + class TestArrayNoCacheV3(TestArrayWithPathV3, TestArrayNoCache): diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index 0f247028ea..d31aab4e76 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -364,8 +364,8 @@ def test_open_array_n5(zarr_version): store = 'data/group.n5' group_kwargs = kwargs.copy() - if zarr_version == 3: - group_kwargs['path'] = 'group' + # if zarr_version == 3: + # group_kwargs['path'] = 'group' z = open_group(store, mode='w', **group_kwargs) i = z.create_group('inner') a = i.zeros("array", shape=100, chunks=10) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 7e9a8ebd41..8ded94f5ae 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -107,14 +107,7 @@ def test_group_init_errors_2(self): def _subgroup_path(self, group, path): path = path.rstrip('/') - absolute = path.startswith('/') - if absolute: - group_path = path - else: - if path: - group_path = '/'.join([group.path, path]) - else: - group_path = path + group_path = '/'.join([group.path, path]) group_path = group_path.lstrip('/') group_name = '/' + group_path return group_path, group_name @@ -132,6 +125,7 @@ def test_create_group(self): # create level 1 child group g2 = g1.create_group('foo') + # check with relative path path, name = self._subgroup_path(g1, 'foo') assert isinstance(g2, Group) assert path == g2.path diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 8ab1731910..0a4435082c 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -51,15 +51,6 @@ def dimension_separator_fixture(request): return request.param -@pytest.fixture(params=[ - (None, "/"), - (".", "."), - ("/", "/"), -]) -def dimension_separator_fixture_v3(request): - return request.param - - def skip_if_nested_chunks(**kwargs): if kwargs.get("dimension_separator") == "/": pytest.skip("nested chunks are unsupported") @@ -346,8 +337,6 @@ def test_hierarchy(self): # TODO: proper behavior of getsize? # v3 returns size of all nested arrays, not just the # size of the arrays in the current folder. - if isinstance(store, ZipStoreV3): - 1 / 0 if self.version == 2: assert 6 == store.getsize() else: @@ -370,6 +359,9 @@ def test_hierarchy(self): assert 0 == store.getsize('c/d/y') assert 0 == store.getsize('c/d/y/z') + # access item via full path + assert 3 == store.getsize(self.root + 'a') + # test listdir (optional) if hasattr(store, 'listdir'): assert {'a', 'b', 'c'} == set(store.listdir(self.root)) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index e1e510ee4d..5fa87f880b 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -51,32 +51,32 @@ class DummyStore(): # contains all methods expected of Mutable Mapping def keys(self): - pass + """keys""" def values(self): - pass + """values""" def get(self, value, default=None): - pass + """get""" def __setitem__(self, key, value): - pass + """__setitem__""" def __getitem__(self, key): - pass + """__getitem__""" def __delitem__(self, key): - pass + """__delitem__""" def __contains__(self, key): - pass + """__contains__""" class InvalidDummyStore(): # does not contain expected methods of a MutableMapping def keys(self): - pass + """keys""" def test_ensure_store_v3(): From 347b67dd5fa67ea4cd9d0d7194b029d36ae63b69 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 6 Jan 2022 17:11:25 -0500 Subject: [PATCH 086/109] implemented ConsolidatedMetadataStoreV3 Parametrize test_consolidate_metadata: removes the need for a separate test_consolidated_with_chunk_store --- zarr/_storage/store.py | 2 + zarr/convenience.py | 60 +++++++++++++--- zarr/storage.py | 60 ++++++++++++++-- zarr/tests/test_convenience.py | 123 ++++++++++++++------------------- 4 files changed, 160 insertions(+), 85 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 18e6a611d0..1d322f3d95 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -179,6 +179,8 @@ def _validate_key(key: str): not key.startswith("data/") and (not key.startswith("meta/")) and (not key == "zarr.json") + # TODO: Possibly allow key == ".zmetadata" too if we write a + # consolidated metadata spec corresponding to this? ): raise ValueError("keys starts with unexpected value: `{}`".format(key)) diff --git a/zarr/convenience.py b/zarr/convenience.py index 3f66e17456..b4b338674e 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -13,7 +13,9 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, normalize_store_arg, BaseStore +from zarr.storage import (_get_hierarchy_metadata, contains_array, contains_group, + normalize_store_arg, BaseStore, ConsolidatedMetadataStore, + ConsolidatedMetadataStoreV3) from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union @@ -916,6 +918,8 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # clear the way if exists: del dest[name] + if name in dest: + 1 / 0 # setup creation keyword arguments kws = create_kws.copy() @@ -1153,7 +1157,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata"): +def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=''): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1176,6 +1180,9 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata"): Store or path to directory in file system or name of zip file. metadata_key : str Key to put the consolidated metadata under. + path : str or None + Path corresponding to the group that is being consolidated. Not required + for zarr v2 stores. Returns ------- @@ -1187,11 +1194,31 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata"): open_consolidated """ - store = normalize_store_arg(store, clobber=True) + store = normalize_store_arg(store) - def is_zarr_key(key): - return (key.endswith('.zarray') or key.endswith('.zgroup') or - key.endswith('.zattrs')) + version = store._store_version + if version > 2: + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + + if version == 2: + + def is_zarr_key(key): + return (key.endswith('.zarray') or key.endswith('.zgroup') or + key.endswith('.zattrs')) + + else: + + def is_zarr_key(key, sfx=sfx): + return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or + key == 'zarr.json') + + # cannot create a group without a path in v3 + # so create /meta/root/consolidated group to store the metadata + if 'consolidated' not in store: + _create_group(store, path='consolidated') + if not metadata_key.startswith('meta/root/'): + metadata_key = 'meta/root/consolidated/' + metadata_key + # path = 'consolidated' out = { 'zarr_consolidated_format': 1, @@ -1201,7 +1228,7 @@ def is_zarr_key(key): } } store[metadata_key] = json_dumps(out) - return open_consolidated(store, metadata_key=metadata_key) + return open_consolidated(store, metadata_key=metadata_key, path=path) def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **kwargs): @@ -1246,17 +1273,28 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** """ - from .storage import ConsolidatedMetadataStore - # normalize parameters store = normalize_store_arg(store, storage_options=kwargs.get("storage_options")) if mode not in {'r', 'r+'}: raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}" .format(mode)) + path = kwargs.pop('path', None) + if store._store_version == 2: + ConsolidatedStoreClass = ConsolidatedMetadataStore + else: + ConsolidatedStoreClass = ConsolidatedMetadataStoreV3 + # default is to store within 'consolidated' group on v3 + if not metadata_key.startswith('meta/root/'): + metadata_key = 'meta/root/consolidated/' + metadata_key + if not path: + raise ValueError( + "path must be provided to open a Zarr 3.x consolidated store" + ) + # setup metadata store - meta_store = ConsolidatedMetadataStore(store, metadata_key=metadata_key) + meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key) # pass through chunk_store = kwargs.pop('chunk_store', None) or store - return open(store=meta_store, chunk_store=chunk_store, mode=mode, **kwargs) + return open(store=meta_store, chunk_store=chunk_store, mode=mode, path=path, **kwargs) diff --git a/zarr/storage.py b/zarr/storage.py index ce9c28f8b3..50c4cd639c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2835,10 +2835,6 @@ class ConsolidatedMetadataStore(Store): def __init__(self, store: StoreLike, metadata_key=".zmetadata"): self.store = Store._ensure_store(store) - if getattr(store, '_store_version', 2) != 2: - raise ValueError("Can only consolidate stores corresponding to " - "the Zarr v2 spec.") - # retrieve consolidated metadata meta = json_loads(store[metadata_key]) @@ -3330,3 +3326,59 @@ def __setitem__(self, key, value): LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ + + +class ConsolidatedMetadataStoreV3(ConsolidatedMetadataStore, StoreV3): + """A layer over other storage, where the metadata has been consolidated into + a single key. + + The purpose of this class, is to be able to get all of the metadata for + a given array in a single read operation from the underlying storage. + See :func:`zarr.convenience.consolidate_metadata` for how to create this + single metadata key. + + This class loads from the one key, and stores the data in a dict, so that + accessing the keys no longer requires operations on the backend store. + + This class is read-only, and attempts to change the array metadata will + fail, but changing the data is possible. If the backend storage is changed + directly, then the metadata stored here could become obsolete, and + :func:`zarr.convenience.consolidate_metadata` should be called again and the class + re-invoked. The use case is for write once, read many times. + + .. note:: This is an experimental feature. + + Parameters + ---------- + store: Store + Containing the zarr array. + metadata_key: str + The target in the store where all of the metadata are stored. We + assume JSON encoding. + + See Also + -------- + zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated + + """ + + def __init__(self, store: StoreLike, metadata_key="meta/root/consolidated/.zmetadata"): + self.store = Store._ensure_store(store) + + # retrieve consolidated metadata + meta = json_loads(store[metadata_key]) + + # check format of consolidated metadata + consolidated_format = meta.get('zarr_consolidated_format', None) + if consolidated_format != 1: + raise MetadataError('unsupported zarr consolidated metadata format: %s' % + consolidated_format) + + # decode metadata + self.meta_store: Store = KVStoreV3(meta["metadata"]) + + def rmdir(self, key): + raise ReadOnlyError() + + # def __setitem__(self, key, value): + # raise ReadOnlyError() diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 821598712f..48553bfced 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -26,6 +26,7 @@ from zarr.hierarchy import Group, group from zarr.storage import ( ConsolidatedMetadataStore, + ConsolidatedMetadataStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, @@ -201,11 +202,21 @@ def test_tree(zarr_version): # TODO: consolidated metadata currently only supported for v2 -def test_consolidate_metadata(): +@pytest.mark.parametrize('zarr_version', [2, 3]) +@pytest.mark.parametrize('with_chunk_store', [False, True], ids=['default', 'with_chunk_store']) +def test_consolidate_metadata(with_chunk_store, zarr_version): + + if zarr_version == 2: + MemoryStoreClass = MemoryStore + path = '' + else: + MemoryStoreClass = MemoryStoreV3 + path = 'dataset' # setup initial data - store = MemoryStore() - z = group(store) + store = MemoryStoreClass() + chunk_store = MemoryStoreClass() if with_chunk_store else None + z = group(store, chunk_store=chunk_store, path=path) z.create_group('g1') g2 = z.create_group('g2') g2.attrs['hello'] = 'world' @@ -217,19 +228,32 @@ def test_consolidate_metadata(): assert 16 == arr.nchunks_initialized # perform consolidation - out = consolidate_metadata(store) + out = consolidate_metadata(store, path=path) assert isinstance(out, Group) - assert '.zmetadata' in store - for key in ['.zgroup', - 'g1/.zgroup', - 'g2/.zgroup', - 'g2/.zattrs', - 'g2/arr/.zarray', - 'g2/arr/.zattrs']: + assert ['g1', 'g2'] == list(out) + if zarr_version == 2: + assert isinstance(out._store, ConsolidatedMetadataStore) + assert '.zmetadata' in store + meta_keys = ['.zgroup', + 'g1/.zgroup', + 'g2/.zgroup', + 'g2/.zattrs', + 'g2/arr/.zarray', + 'g2/arr/.zattrs'] + else: + assert isinstance(out._store, ConsolidatedMetadataStoreV3) + assert 'meta/root/consolidated/.zmetadata' in store + meta_keys = ['zarr.json', + 'meta/root/dataset.group.json', + 'meta/root/dataset/g1.group.json', + 'meta/root/dataset/g2.group.json', + 'meta/root/dataset/g2/arr.array.json', + 'meta/root/consolidated.group.json'] + for key in meta_keys: del store[key] # open consolidated - z2 = open_consolidated(store) + z2 = open_consolidated(store, chunk_store=chunk_store, path=path) assert ['g1', 'g2'] == list(z2) assert 'world' == z2.g2.attrs['hello'] assert 1 == z2.g2.arr.attrs['data'] @@ -238,11 +262,18 @@ def test_consolidate_metadata(): assert 16 == z2.g2.arr.nchunks_initialized # tests del/write on the store - cmd = ConsolidatedMetadataStore(store) - with pytest.raises(PermissionError): - del cmd['.zgroup'] - with pytest.raises(PermissionError): - cmd['.zgroup'] = None + if zarr_version == 2: + cmd = ConsolidatedMetadataStore(store) + with pytest.raises(PermissionError): + del cmd['.zgroup'] + with pytest.raises(PermissionError): + cmd['.zgroup'] = None + else: + cmd = ConsolidatedMetadataStoreV3(store) + with pytest.raises(PermissionError): + del cmd['meta/root/dataset.group.json'] + with pytest.raises(PermissionError): + cmd['meta/root/dataset.group.json'] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) @@ -267,62 +298,14 @@ def test_consolidate_metadata(): # test invalid modes with pytest.raises(ValueError): - open_consolidated(store, mode='a') + open_consolidated(store, chunk_store=chunk_store, mode='a', path=path) with pytest.raises(ValueError): - open_consolidated(store, mode='w') + open_consolidated(store, chunk_store=chunk_store, mode='w', path=path) # make sure keyword arguments are passed through without error - open_consolidated(store, cache_attrs=True, synchronizer=None) - - -def test_consolidated_with_chunk_store(): - # setup initial data - store = MemoryStore() - chunk_store = MemoryStore() - z = group(store, chunk_store=chunk_store) - z.create_group('g1') - g2 = z.create_group('g2') - g2.attrs['hello'] = 'world' - arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') - assert 16 == arr.nchunks - assert 0 == arr.nchunks_initialized - arr.attrs['data'] = 1 - arr[:] = 1.0 - assert 16 == arr.nchunks_initialized - - # perform consolidation - out = consolidate_metadata(store) - assert isinstance(out, Group) - assert '.zmetadata' in store - for key in ['.zgroup', - 'g1/.zgroup', - 'g2/.zgroup', - 'g2/.zattrs', - 'g2/arr/.zarray', - 'g2/arr/.zattrs']: - del store[key] - # open consolidated - z2 = open_consolidated(store, chunk_store=chunk_store) - assert ['g1', 'g2'] == list(z2) - assert 'world' == z2.g2.attrs['hello'] - assert 1 == z2.g2.arr.attrs['data'] - assert (z2.g2.arr[:] == 1.0).all() - assert 16 == z2.g2.arr.nchunks - assert 16 == z2.g2.arr.nchunks_initialized - - # test the data are writeable - z2.g2.arr[:] = 2 - assert (z2.g2.arr[:] == 2).all() - - # test invalid modes - with pytest.raises(ValueError): - open_consolidated(store, mode='a', chunk_store=chunk_store) - with pytest.raises(ValueError): - open_consolidated(store, mode='w', chunk_store=chunk_store) - - # make sure keyword arguments are passed through without error - open_consolidated(store, cache_attrs=True, synchronizer=None, - chunk_store=chunk_store) + open_consolidated( + store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None + ) @pytest.mark.parametrize("options", ( From 0f2d5831a54cf63adf1399d8ce3ed15fceadf02f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 6 Jan 2022 17:41:17 -0500 Subject: [PATCH 087/109] expand ConsolidatedMetadataStoreV3 tests update _ensure_store to disallow mismatched Store versions --- zarr/_storage/store.py | 8 ++++ zarr/storage.py | 6 +-- zarr/tests/test_storage.py | 23 ++++++++--- zarr/tests/test_storage_v3.py | 72 ++++++++++++++++++++++------------- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 1d322f3d95..470822e8fa 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -86,6 +86,10 @@ def _ensure_store(store: Any): if store is None: return None elif isinstance(store, BaseStore): + if not store._store_version == 2: + raise ValueError( + f"cannot initialize a v2 store with a v{store._store_version} store" + ) return store elif isinstance(store, MutableMapping): return KVStore(store) @@ -260,6 +264,10 @@ def _ensure_store(store): return None elif isinstance(store, StoreV3): return store + elif isinstance(store, Store): + raise ValueError( + f"cannot initialize a v3 store with a v{store._store_version} store" + ) elif isinstance(store, MutableMapping): return KVStoreV3(store) else: diff --git a/zarr/storage.py b/zarr/storage.py index 50c4cd639c..37be7bae74 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2836,7 +2836,7 @@ def __init__(self, store: StoreLike, metadata_key=".zmetadata"): self.store = Store._ensure_store(store) # retrieve consolidated metadata - meta = json_loads(store[metadata_key]) + meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata consolidated_format = meta.get('zarr_consolidated_format', None) @@ -3363,10 +3363,10 @@ class ConsolidatedMetadataStoreV3(ConsolidatedMetadataStore, StoreV3): """ def __init__(self, store: StoreLike, metadata_key="meta/root/consolidated/.zmetadata"): - self.store = Store._ensure_store(store) + self.store = StoreV3._ensure_store(store) # retrieve consolidated metadata - meta = json_loads(store[metadata_key]) + meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata consolidated_format = meta.get('zarr_consolidated_format', None) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 749e1702e3..6f59477b1b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -26,7 +26,7 @@ from zarr.meta import ZARR_FORMAT, decode_array_metadata from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, - DictStore, DirectoryStore, KVStore, LMDBStore, + DictStore, DirectoryStore, KVStore, KVStoreV3, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, Store, TempStore, ZipStore, KVStoreV3, @@ -68,6 +68,10 @@ class InvalidStore: with pytest.raises(ValueError): Store._ensure_store(InvalidStore()) + # cannot initialize with a store from a different Zarr version + with pytest.raises(ValueError): + Store._ensure_store(KVStoreV3(dict())) + assert Store._ensure_store(None) is None @@ -2339,6 +2343,13 @@ def test_pickle(self): class TestConsolidatedMetadataStore: + version = 2 + ConsolidatedMetadataClass = ConsolidatedMetadataStore + + @property + def metadata_key(self): + return '.zmetadata' + def test_bad_format(self): # setup store with consolidated metadata @@ -2347,15 +2358,15 @@ def test_bad_format(self): # bad format version 'zarr_consolidated_format': 0, } - store['.zmetadata'] = json.dumps(consolidated).encode() + store[self.metadata_key] = json.dumps(consolidated).encode() # check appropriate error is raised with pytest.raises(MetadataError): - ConsolidatedMetadataStore(store) + self.ConsolidatedMetadataClass(store) def test_bad_store_version(self): with pytest.raises(ValueError): - ConsolidatedMetadataStore(KVStoreV3(dict())) + self.ConsolidatedMetadataClass(KVStoreV3(dict())) def test_read_write(self): @@ -2368,10 +2379,10 @@ def test_read_write(self): 'baz': 42, } } - store['.zmetadata'] = json.dumps(consolidated).encode() + store[self.metadata_key] = json.dumps(consolidated).encode() # create consolidated store - cs = ConsolidatedMetadataStore(store) + cs = self.ConsolidatedMetadataClass(store) # test __contains__, __getitem__ for key, value in consolidated['metadata'].items(): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 5fa87f880b..8db49c664c 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -1,41 +1,41 @@ import array import atexit +import json import os import tempfile import numpy as np import pytest - from zarr._storage.store import _valid_key_characters -from zarr.storage import (atexit_rmglob, atexit_rmtree, default_compressor, - getsize, init_array) -from zarr.storage import (KVStoreV3, MemoryStoreV3, ZipStoreV3, FSStoreV3, - DirectoryStoreV3, NestedDirectoryStoreV3, - RedisStoreV3, MongoDBStoreV3, DBMStoreV3, - LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3, - StoreV3, normalize_store_arg, KVStore) +from zarr.errors import MetadataError +from zarr.storage import (ConsolidatedMetadataStoreV3, DBMStoreV3, + DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, + LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, + MongoDBStoreV3, NestedDirectoryStoreV3, RedisStoreV3, + SQLiteStoreV3, StoreV3, ZipStoreV3, atexit_rmglob, + atexit_rmtree, default_compressor, getsize, + init_array, normalize_store_arg) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var -from .test_storage import ( - StoreTests as _StoreTests, - TestMemoryStore as _TestMemoryStore, - TestDirectoryStore as _TestDirectoryStore, - TestFSStore as _TestFSStore, - TestNestedDirectoryStore as _TestNestedDirectoryStore, - TestZipStore as _TestZipStore, - TestDBMStore as _TestDBMStore, - TestDBMStoreDumb as _TestDBMStoreDumb, - TestDBMStoreGnu as _TestDBMStoreGnu, - TestDBMStoreNDBM as _TestDBMStoreNDBM, - TestDBMStoreBerkeleyDB as _TestDBMStoreBerkeleyDB, - TestLMDBStore as _TestLMDBStore, - TestSQLiteStore as _TestSQLiteStore, - TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory, - TestLRUStoreCache as _TestLRUStoreCache, - skip_if_nested_chunks) - # pytest will fail to run if the following fixtures aren't imported here -from .test_storage import dimension_separator_fixture, s3 # noqa +from .test_storage import StoreTests as _StoreTests +from .test_storage import TestConsolidatedMetadataStore as _TestConsolidatedMetadataStore +from .test_storage import TestDBMStore as _TestDBMStore +from .test_storage import TestDBMStoreBerkeleyDB as _TestDBMStoreBerkeleyDB +from .test_storage import TestDBMStoreDumb as _TestDBMStoreDumb +from .test_storage import TestDBMStoreGnu as _TestDBMStoreGnu +from .test_storage import TestDBMStoreNDBM as _TestDBMStoreNDBM +from .test_storage import TestDirectoryStore as _TestDirectoryStore +from .test_storage import TestFSStore as _TestFSStore +from .test_storage import TestLMDBStore as _TestLMDBStore +from .test_storage import TestLRUStoreCache as _TestLRUStoreCache +from .test_storage import TestMemoryStore as _TestMemoryStore +from .test_storage import TestNestedDirectoryStore as _TestNestedDirectoryStore +from .test_storage import TestSQLiteStore as _TestSQLiteStore +from .test_storage import TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory +from .test_storage import TestZipStore as _TestZipStore +from .test_storage import (dimension_separator_fixture, s3, # noqa + skip_if_nested_chunks) @pytest.fixture(params=[ @@ -86,6 +86,10 @@ class InvalidStore: with pytest.raises(ValueError): StoreV3._ensure_store(InvalidStore()) + # cannot initialize with a store from a different Zarr version + with pytest.raises(ValueError): + StoreV3._ensure_store(KVStore(dict())) + assert StoreV3._ensure_store(None) is None # class with all methods of a MutableMapping will become a KVStoreV3 @@ -495,3 +499,17 @@ def test_normalize_store_arg_v3(tmpdir): # error on zarr_version=2 with a v3 store with pytest.raises(ValueError): normalize_store_arg(KVStoreV3(dict()), zarr_version=2, mode='w', clobber=True) + + +class TestConsolidatedMetadataStoreV3(_TestConsolidatedMetadataStore): + + version = 3 + ConsolidatedMetadataClass = ConsolidatedMetadataStoreV3 + + @property + def metadata_key(self): + return 'meta/root/consolidated/.zmetadata' + + def test_bad_store_version(self): + with pytest.raises(ValueError): + self.ConsolidatedMetadataClass(KVStore(dict())) From 0c39bd45ab555b8fe2f12d2c45963d26ffc64e9a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 7 Jan 2022 12:59:14 -0500 Subject: [PATCH 088/109] remove debug statement --- zarr/convenience.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index b4b338674e..bbf99f5f11 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -918,8 +918,6 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # clear the way if exists: del dest[name] - if name in dest: - 1 / 0 # setup creation keyword arguments kws = create_kws.copy() From 993493b399ed45db2c958d3198ea9c81c294a24b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 7 Jan 2022 13:02:14 -0500 Subject: [PATCH 089/109] fix tests: restore clobber=True --- zarr/convenience.py | 2 +- zarr/tests/data/store.zip | Bin 107 -> 343 bytes 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index bbf99f5f11..5f232db9d6 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1192,7 +1192,7 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path='' open_consolidated """ - store = normalize_store_arg(store) + store = normalize_store_arg(store, clobber=True) version = store._store_version if version > 2: diff --git a/zarr/tests/data/store.zip b/zarr/tests/data/store.zip index a36fd675b34fb7f6bcea7c7e9d3f65e359106ee5..76ba856c6279354024e61645bb2da812b188599e 100644 GIT binary patch literal 343 zcmWIWW@Zs#00Dyx^^jFP|I3(xY!K!J;@s4dME#=t{1W}N{QRWEq5u@tKm~AssyZpL z3Qeh6<{Sl}Vh{!y4m6GtNPz$ogaQkLm@H6!fHxzP2s3WCL6tEq0ULzkQgp2_r63yQ rP#9Rks0U;sw1SX{+BTW*)W`zpP!Ui6yVLsB*Kha6<9lvXkY}ffO-PFS=m5L NMj$i<(kdVh0|51G4^aRB From a08d4f76a09404a22b964b3dae2d6ec9a0ae4519 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 7 Jan 2022 16:50:49 -0500 Subject: [PATCH 090/109] test error path in consolidate_metadata --- zarr/tests/test_convenience.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 48553bfced..b68e4a8fcf 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -227,6 +227,14 @@ def test_consolidate_metadata(with_chunk_store, zarr_version): arr[:] = 1.0 assert 16 == arr.nchunks_initialized + if zarr_version == 3: + # error on v3 if path not provided + with pytest.raises(ValueError): + consolidate_metadata(store, path=None) + + with pytest.raises(ValueError): + consolidate_metadata(store, path='') + # perform consolidation out = consolidate_metadata(store, path=path) assert isinstance(out, Group) From 43dfa294549e7e112db3cbacc02274e5374bbce1 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 7 Jan 2022 16:52:14 -0500 Subject: [PATCH 091/109] add pragma: no cover for lines in test_meta.py that will only be visited on some architectures --- zarr/tests/test_meta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index 83029e52d6..8acd634a13 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -565,9 +565,9 @@ def test_get_extended_dtype_info(): for dtype in extended_types: try: info = get_extended_dtype_info(np.asarray([], dtype=dtype).dtype) - except TypeError: + except TypeError: # pragma: no cover # skip any numpy dtypes not supported by a particular architecture - pass + pass # pragma: no cover assert 'extension' in info assert 'type' in info assert 'fallback' in info From 41b052f88f0b415795972149ed3401259a916504 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 7 Jan 2022 16:54:27 -0500 Subject: [PATCH 092/109] flake8 fixes --- zarr/tests/test_storage.py | 2 +- zarr/tests/test_storage_v3.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 6f59477b1b..7b563a973b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -29,7 +29,7 @@ DictStore, DirectoryStore, KVStore, KVStoreV3, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, - Store, TempStore, ZipStore, KVStoreV3, + Store, TempStore, ZipStore, array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2, diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 8db49c664c..9e7b522817 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -1,13 +1,11 @@ import array import atexit -import json import os import tempfile import numpy as np import pytest from zarr._storage.store import _valid_key_characters -from zarr.errors import MetadataError from zarr.storage import (ConsolidatedMetadataStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, From 8af6131c6945b5465b53bfb364f661d91e1b5711 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sun, 9 Jan 2022 14:08:16 -0500 Subject: [PATCH 093/109] flake8 --- zarr/convenience.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/convenience.py b/zarr/convenience.py index 5f232db9d6..8403fb7b3f 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1195,8 +1195,6 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path='' store = normalize_store_arg(store, clobber=True) version = store._store_version - if version > 2: - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] if version == 2: @@ -1206,7 +1204,9 @@ def is_zarr_key(key): else: - def is_zarr_key(key, sfx=sfx): + sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + + def is_zarr_key(key): return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or key == 'zarr.json') From 001510b3e4397047c881de923fab3a9cd088a95a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 12 Jan 2022 22:50:24 -0500 Subject: [PATCH 094/109] ENH: add ABSStoreV3 --- zarr/_storage/absstore.py | 95 ++++++++++++++++++++++++++++++++--- zarr/storage.py | 2 +- zarr/tests/test_core.py | 24 +++++++-- zarr/tests/test_hierarchy.py | 18 ++++++- zarr/tests/test_storage.py | 39 ++++++++------ zarr/tests/test_storage_v3.py | 13 +++-- 6 files changed, 159 insertions(+), 32 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index f23b406e0b..c2b929c160 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -3,7 +3,7 @@ import warnings from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path -from zarr._storage.store import Store +from zarr._storage.store import Store, StoreV3, _get_hierarchy_metadata, _rmdir_from_keys_v3 __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], @@ -169,11 +169,7 @@ def listdir(self, path=None): return items def rmdir(self, path=None): - dir_path = normalize_storage_path(self._append_path_to_prefix(path)) - if dir_path: - dir_path += '/' - for blob in self.client.list_blobs(name_starts_with=dir_path): - self.client.delete_blob(blob) + rmdir_abs(self, path) def getsize(self, path=None): store_path = normalize_storage_path(path) @@ -199,3 +195,90 @@ def getsize(self, path=None): def clear(self): self.rmdir() + + +def rmdir_abs(store: ABSStore, path=None): + dir_path = normalize_storage_path(store._append_path_to_prefix(path)) + if dir_path: + dir_path += '/' + for blob in store.client.list_blobs(name_starts_with=dir_path): + store.client.delete_blob(blob) + + +class ABSStoreV3(ABSStore, StoreV3): + + def list(self): + return list(self.keys()) + + def __eq__(self, other): + return ( + isinstance(other, ABSStoreV3) and + self.client == other.client and + self.prefix == other.prefix + ) + + def __setitem__(self, key, value): + self._validate_key(key) + super().__setitem__(key, value) + + def rmdir(self, path=None): + if not path: + # Currently allowing clear to delete everything as in v2 + + # If we disallow an empty path then we will need to modify + # TestABSStoreV3 to have the create_store method use a prefix. + rmdir_abs(self, '') + return + meta_dir = 'meta/root/' + path + meta_dir = meta_dir.rstrip('/') + rmdir_abs(self, meta_dir) + + # remove data folder + data_dir = 'data/root/' + path + data_dir = data_dir.rstrip('/') + rmdir_abs(self, data_dir) + + # remove metadata files + sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + array_meta_file = meta_dir + '.array' + sfx + if array_meta_file in self: + del self[array_meta_file] + group_meta_file = meta_dir + '.group' + sfx + if group_meta_file in self: + del self[group_meta_file] + + def getsize(self, path=None): + from zarr.storage import _getsize # avoid circular import + return _getsize(self, path) + + # # TODO: adapt the v2 getsize method to work for v3 + # def getsize(self, path=None): + # path = '' if path is None else path + # size = 0 + # size += self._getsize('meta/root/' + path) + # size += self._getsize('data/root/' + path) + # return size + + # def _getsize(self, path=None): + # store_path = normalize_storage_path(path) + # fs_path = self._append_path_to_prefix(store_path) + # if fs_path: + # blob_client = self.client.get_blob_client(fs_path) + # else: + # blob_client = None + # if blob_client and blob_client.exists(): + # return blob_client.get_blob_properties().size + # else: + # size = 0 + # if fs_path == '': + # fs_path = None + # elif not fs_path.endswith('/'): + # fs_path += '/' + # for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): + # blob_client = self.client.get_blob_client(blob) + # if blob_client.exists(): + # size += blob_client.get_blob_properties().size + # return size + + +ABSStoreV3.__doc__ = ABSStore.__doc__ diff --git a/zarr/storage.py b/zarr/storage.py index 37be7bae74..81790b1ae0 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -57,7 +57,7 @@ normalize_dtype, normalize_fill_value, normalize_order, normalize_shape, normalize_storage_path, retry_call) -from zarr._storage.absstore import ABSStore # noqa: F401 +from zarr._storage.absstore import ABSStore, ABSStoreV3 # noqa: F401 from zarr._storage.store import (_get_hierarchy_metadata, _listdir_from_keys, _rename_from_keys, diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index cdc41b17ba..5fe6e0eb35 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -36,7 +36,7 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, - # ABSStoreV3, + ABSStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, @@ -2889,9 +2889,25 @@ def test_nbytes_stored(self): assert expect_nbytes_stored == z.nbytes_stored -# TODO: TestArrayWithABSStoreV3 -# @skip_test_env_var("ZARR_TEST_ABS") -# class TestArrayWithABSStoreV3(TestArrayWithPathV3): +@skip_test_env_var("ZARR_TEST_ABS") +class TestArrayWithABSStoreV3(TestArrayWithABSStore, TestArrayWithPathV3): + + @staticmethod + def absstore(): + client = abs_container() + store = ABSStoreV3(client=client) + store.rmdir() + return store + + def create_array(self, array_path='arr1', read_only=False, **kwargs): + store = self.absstore() + kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + write_empty_chunks = kwargs.pop('write_empty_chunks', True) + init_array(store, path=array_path, **kwargs) + return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) class TestArrayWithNestedDirectoryStoreV3(TestArrayWithNestedDirectoryStore, diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 8ded94f5ae..9896873679 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -27,7 +27,7 @@ NestedDirectoryStore, SQLiteStore, ZipStore, array_meta_key, atexit_rmglob, atexit_rmtree, group_meta_key, init_array, init_group) -from zarr.storage import (KVStoreV3, DirectoryStoreV3, # MemoryStoreV3 +from zarr.storage import (ABSStoreV3, KVStoreV3, DirectoryStoreV3, # MemoryStoreV3 FSStoreV3, NestedDirectoryStoreV3, ZipStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) @@ -1128,7 +1128,21 @@ def test_pickle(self): # internal attribute on ContainerClient isn't serializable for py36 and earlier super().test_pickle() -# TODO TestGroupV3WithABSStore(TestGroup): + +@skip_test_env_var("ZARR_TEST_ABS") +class TestGroupWithABSStoreV3(TestGroupV3): + + @staticmethod + def create_store(): + container_client = abs_container() + store = ABSStoreV3(client=container_client) + store.rmdir() + return store, None + + @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") + def test_pickle(self): + # internal attribute on ContainerClient isn't serializable for py36 and earlier + super().test_pickle() class TestGroupWithNestedDirectoryStore(TestGroup): diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7b563a973b..4f00de1724 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -26,10 +26,10 @@ from zarr.meta import ZARR_FORMAT, decode_array_metadata from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, - DictStore, DirectoryStore, KVStore, KVStoreV3, LMDBStore, + DictStore, DirectoryStore, KVStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, - Store, TempStore, ZipStore, + Store, TempStore, ZipStore, KVStoreV3, array_meta_key, atexit_rmglob, atexit_rmtree, attrs_key, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2, @@ -2285,9 +2285,11 @@ def test_format_compatibility(): @skip_test_env_var("ZARR_TEST_ABS") class TestABSStore(StoreTests): + ABSStoreClass = ABSStore + def create_store(self, prefix=None, **kwargs): container_client = abs_container() - store = ABSStore( + store = self.ABSStoreClass( prefix=prefix, client=container_client, **kwargs, @@ -2297,7 +2299,7 @@ def create_store(self, prefix=None, **kwargs): def test_non_client_deprecated(self): with pytest.warns(FutureWarning, match='Providing'): - store = ABSStore("container", account_name="account_name", account_key="account_key") + store = self.ABSStoreClass("container", account_name="account_name", account_key="account_key") for attr in ["container", "account_name", "account_key"]: with pytest.warns(FutureWarning, match=attr): @@ -2305,7 +2307,13 @@ def test_non_client_deprecated(self): assert result == attr def test_iterators_with_prefix(self): - for prefix in ['test_prefix', '/test_prefix', 'test_prefix/', 'test/prefix', '', None]: + prefixes = ['test_prefix', '/test_prefix', 'test_prefix/', 'test/prefix'] + + if self.version < 3: + # empty prefix not allowed in v3 + prefixes += ['', None] + + for prefix in prefixes: store = self.create_store(prefix=prefix) # test iterator methods on empty store @@ -2315,19 +2323,22 @@ def test_iterators_with_prefix(self): assert set() == set(store.values()) assert set() == set(store.items()) + prefix = 'meta/root/' if self.version > 2 else '' # setup some values - store['a'] = b'aaa' - store['b'] = b'bbb' - store['c/d'] = b'ddd' - store['c/e/f'] = b'fff' + store[prefix + 'a'] = b'aaa' + store[prefix + 'b'] = b'bbb' + store[prefix + 'c/d'] = b'ddd' + store[prefix + 'c/e/f'] = b'fff' # test iterators on store with data assert 4 == len(store) - assert {'a', 'b', 'c/d', 'c/e/f'} == set(store) - assert {'a', 'b', 'c/d', 'c/e/f'} == set(store.keys()) - assert {b'aaa', b'bbb', b'ddd', b'fff'} == set(store.values()) - assert ({('a', b'aaa'), ('b', b'bbb'), ('c/d', b'ddd'), ('c/e/f', b'fff')} == - set(store.items())) + keys = [prefix + 'a', prefix + 'b', prefix + 'c/d', prefix + 'c/e/f'] + values = [b'aaa', b'bbb', b'ddd', b'fff'] + items = [(k, v) for k, v in zip(keys, values)] + assert set(keys) == set(store) + assert set(keys) == set(store.keys()) + assert set(values) == set(store.values()) + assert set(items) == set(store.items()) def test_getsize(self): return super().test_getsize() diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 9e7b522817..e48bac54bd 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -6,17 +6,18 @@ import numpy as np import pytest from zarr._storage.store import _valid_key_characters -from zarr.storage import (ConsolidatedMetadataStoreV3, DBMStoreV3, +from zarr.storage import (ABSStoreV3, ConsolidatedMetadataStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, MongoDBStoreV3, NestedDirectoryStoreV3, RedisStoreV3, SQLiteStoreV3, StoreV3, ZipStoreV3, atexit_rmglob, atexit_rmtree, default_compressor, getsize, init_array, normalize_store_arg) -from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var +from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var, abs_container # pytest will fail to run if the following fixtures aren't imported here from .test_storage import StoreTests as _StoreTests +from .test_storage import TestABSStore as _TestABSStore from .test_storage import TestConsolidatedMetadataStore as _TestConsolidatedMetadataStore from .test_storage import TestDBMStore as _TestDBMStore from .test_storage import TestDBMStoreBerkeleyDB as _TestDBMStoreBerkeleyDB @@ -464,9 +465,11 @@ class TestLRUStoreCacheV3(_TestLRUStoreCache, StoreV3Tests): LRUStoreClass = LRUStoreCacheV3 -# TODO: implement ABSStoreV3 -# @skip_test_env_var("ZARR_TEST_ABS") -# class TestABSStoreV3(_TestABSStore, StoreV3Tests): +@skip_test_env_var("ZARR_TEST_ABS") +class TestABSStoreV3(_TestABSStore, StoreV3Tests): + + ABSStoreClass = ABSStoreV3 + def test_normalize_store_arg_v3(tmpdir): From 92f4b80a1cf1173d1d1001d7ca607c8ffe4b0373 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 12 Jan 2022 22:53:00 -0500 Subject: [PATCH 095/109] flake8 --- zarr/_storage/absstore.py | 33 +++------------------------------ zarr/tests/test_storage_v3.py | 2 +- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index c2b929c160..1eaccf2ea5 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -3,7 +3,7 @@ import warnings from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path -from zarr._storage.store import Store, StoreV3, _get_hierarchy_metadata, _rmdir_from_keys_v3 +from zarr._storage.store import Store, StoreV3, _get_hierarchy_metadata __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], @@ -247,38 +247,11 @@ def rmdir(self, path=None): if group_meta_file in self: del self[group_meta_file] + # TODO: adapt the v2 getsize method to work for v3 + # For now, calling the generic keys-based _getsize def getsize(self, path=None): from zarr.storage import _getsize # avoid circular import return _getsize(self, path) - # # TODO: adapt the v2 getsize method to work for v3 - # def getsize(self, path=None): - # path = '' if path is None else path - # size = 0 - # size += self._getsize('meta/root/' + path) - # size += self._getsize('data/root/' + path) - # return size - - # def _getsize(self, path=None): - # store_path = normalize_storage_path(path) - # fs_path = self._append_path_to_prefix(store_path) - # if fs_path: - # blob_client = self.client.get_blob_client(fs_path) - # else: - # blob_client = None - # if blob_client and blob_client.exists(): - # return blob_client.get_blob_properties().size - # else: - # size = 0 - # if fs_path == '': - # fs_path = None - # elif not fs_path.endswith('/'): - # fs_path += '/' - # for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): - # blob_client = self.client.get_blob_client(blob) - # if blob_client.exists(): - # size += blob_client.get_blob_properties().size - # return size - ABSStoreV3.__doc__ = ABSStore.__doc__ diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index e48bac54bd..d56b65edc5 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -13,7 +13,7 @@ SQLiteStoreV3, StoreV3, ZipStoreV3, atexit_rmglob, atexit_rmtree, default_compressor, getsize, init_array, normalize_store_arg) -from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var, abs_container +from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var # pytest will fail to run if the following fixtures aren't imported here from .test_storage import StoreTests as _StoreTests From c358506ff881e169dd50b55d639f81f02e16cc47 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 1 Mar 2022 11:40:57 -0500 Subject: [PATCH 096/109] fix ABSStore.rmdir test coverage --- zarr/tests/test_hierarchy.py | 29 +++++++++++++++++++++++++++++ zarr/tests/test_storage.py | 4 +++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 9896873679..524462e239 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -18,6 +18,7 @@ from numcodecs import Zlib from numpy.testing import assert_array_equal +from zarr._storage.store import _get_hierarchy_metadata from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import open_array @@ -245,6 +246,34 @@ def test_require_group(self): g1.store.close() + def test_rmdir_group_and_array_metadata_files(self): + """Test group.store's rmdir method. + + This test case was added to complete test coverage of `ABSStore.rmdir`. + """ + g1 = self.create_group() + # create a dataset + g1.create_dataset('arr1', shape=(100,), chunks=(10,), dtype=np.uint8) + + # create level 1 child group + g2 = g1.create_group('foo') + g1.create_dataset('arr2', shape=(100,), chunks=(10,), dtype=np.uint8) + + if g1._version > 2 and g1.store.is_erasable(): + arr_path = os.path.join(g1.path, 'arr1') + sfx = _get_hierarchy_metadata(g1.store)['metadata_key_suffix'] + array_meta_file = 'meta/root/' + arr_path + '.array' + sfx + assert array_meta_file in g1.store + group_meta_file = 'meta/root/' + g2.path + '.group' + sfx + assert group_meta_file in g1.store + + # rmdir on the array path should also remove the metadata file + g1.store.rmdir(arr_path) + assert array_meta_file not in g1.store + # rmdir on the group path should also remove its metadata file + g1.store.rmdir(g2.path) + assert group_meta_file not in g1.store + def _dataset_path(self, group, path): path = path.rstrip('/') absolute = path.startswith('/') diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 4f00de1724..b5f873dcb3 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2299,7 +2299,9 @@ def create_store(self, prefix=None, **kwargs): def test_non_client_deprecated(self): with pytest.warns(FutureWarning, match='Providing'): - store = self.ABSStoreClass("container", account_name="account_name", account_key="account_key") + store = self.ABSStoreClass( + "container", account_name="account_name", account_key="account_key" + ) for attr in ["container", "account_name", "account_key"]: with pytest.warns(FutureWarning, match=attr): From 133ee2697bea40eb81b35e27192b6acf164747a4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 1 Mar 2022 20:29:53 -0500 Subject: [PATCH 097/109] always use / in path --- zarr/tests/test_hierarchy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 524462e239..078f4bfab7 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -260,7 +260,7 @@ def test_rmdir_group_and_array_metadata_files(self): g1.create_dataset('arr2', shape=(100,), chunks=(10,), dtype=np.uint8) if g1._version > 2 and g1.store.is_erasable(): - arr_path = os.path.join(g1.path, 'arr1') + arr_path = g1.path + '/arr1' sfx = _get_hierarchy_metadata(g1.store)['metadata_key_suffix'] array_meta_file = 'meta/root/' + arr_path + '.array' + sfx assert array_meta_file in g1.store From 34f6747e17e0fa3c1984274f31a30cd30ffaa26f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 4 Mar 2022 13:55:44 -0500 Subject: [PATCH 098/109] remove remaining use of clobber argument in new tests --- zarr/tests/test_convenience.py | 1 - zarr/tests/test_storage.py | 4 ++-- zarr/tests/test_storage_v3.py | 5 ++--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index fd4fc24b98..7aa4d2cefe 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -368,7 +368,6 @@ def test_consolidated_with_chunk_store(): # make sure keyword arguments are passed through without error open_consolidated(store, cache_attrs=True, synchronizer=None, chunk_store=chunk_store) ->>>>>>> upstream/master @pytest.mark.parametrize("options", ( diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 04ac7d2481..3f4a3d204d 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -2474,10 +2474,10 @@ def test_normalize_store_arg(tmpdir): for ext, Class in [('.zip', ZipStore), ('.n5', N5Store)]: fn = tmpdir.join('store' + ext) - store = normalize_store_arg(str(fn), zarr_version=2, mode='w', clobber=True) + store = normalize_store_arg(str(fn), zarr_version=2, mode='w') assert isinstance(store, Class) if have_fsspec: path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=2, mode='w', clobber=True) + store = normalize_store_arg("file://" + path, zarr_version=2, mode='w') assert isinstance(store, FSStore) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index d56b65edc5..8f88d99218 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -474,14 +474,13 @@ class TestABSStoreV3(_TestABSStore, StoreV3Tests): def test_normalize_store_arg_v3(tmpdir): fn = tmpdir.join('store.zip') - store = normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True) + store = normalize_store_arg(str(fn), zarr_version=3, mode='w') assert isinstance(store, ZipStoreV3) assert 'zarr.json' in store # can't pass storage_options to non-fsspec store with pytest.raises(ValueError): - normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True, - storage_options={"some": "kwargs"}) + normalize_store_arg(str(fn), zarr_version=3, mode='w', storage_options={"some": "kwargs"}) if have_fsspec: path = tempfile.mkdtemp() From 9f56e822e131941d52f3f45710def1bfe119c46d Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 4 Mar 2022 14:04:10 -0500 Subject: [PATCH 099/109] remove NestedDirectoryStoreV3 No need for this class as DirectoryStoreV3 with / chunk separator can be used instead --- zarr/storage.py | 19 ---------------- zarr/tests/test_core.py | 27 ---------------------- zarr/tests/test_hierarchy.py | 13 +---------- zarr/tests/test_storage_v3.py | 43 +++++++---------------------------- 4 files changed, 9 insertions(+), 93 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 91ae0bb7ec..478f057ac3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -3181,25 +3181,6 @@ def getsize(self, path=None): ZipStoreV3.__doc__ = ZipStore.__doc__ -class NestedDirectoryStoreV3(NestedDirectoryStore, DirectoryStoreV3): - - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return ( - isinstance(other, NestedDirectoryStoreV3) and - self.path == other.path - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -NestedDirectoryStoreV3.__doc__ = NestedDirectoryStore.__doc__ - - class RedisStoreV3(RmdirV3, RedisStore, StoreV3): def list(self): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 5fe6e0eb35..1d5ee044d9 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -43,7 +43,6 @@ KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, - NestedDirectoryStoreV3, SQLiteStoreV3, StoreV3, atexit_rmglob, @@ -2910,32 +2909,6 @@ def create_array(self, array_path='arr1', read_only=False, **kwargs): cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) -class TestArrayWithNestedDirectoryStoreV3(TestArrayWithNestedDirectoryStore, - TestArrayWithDirectoryStoreV3): - - @staticmethod - def create_array(array_path='arr1', read_only=False, **kwargs): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - store = NestedDirectoryStoreV3(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, path=array_path, **kwargs) - return Array(store, path=array_path, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) - - def expected(self): - return [ - "73ab8ace56719a5c9308c3754f5e2d57bc73dc20", - "5fb3d02b8f01244721582929b3cad578aec5cea5", - "26b098bedb640846e18dc2fbc1c27684bb02b532", - "799a458c287d431d747bec0728987ca4fe764549", - "c780221df84eb91cb62f633f12d3f1eaa9cee6bd", - ] - - # TODO: TestArrayWithN5StoreV3 # class TestArrayWithN5StoreV3(TestArrayWithDirectoryStoreV3): diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 078f4bfab7..dd9c1936d1 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -29,8 +29,7 @@ array_meta_key, atexit_rmglob, atexit_rmtree, group_meta_key, init_array, init_group) from zarr.storage import (ABSStoreV3, KVStoreV3, DirectoryStoreV3, # MemoryStoreV3 - FSStoreV3, NestedDirectoryStoreV3, ZipStoreV3, - DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, + FSStoreV3, ZipStoreV3, DBMStoreV3, LMDBStoreV3, SQLiteStoreV3, LRUStoreCacheV3) from zarr.util import InfoReporter, buffer_size from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container @@ -1184,16 +1183,6 @@ def create_store(): return store, None -class TestGroupV3WithNestedDirectoryStore(TestGroupWithNestedDirectoryStore, TestGroupV3): - - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = NestedDirectoryStoreV3(path) - return store, None - - @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithFSStore(TestGroup): diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 8f88d99218..3911d18b25 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -9,10 +9,9 @@ from zarr.storage import (ABSStoreV3, ConsolidatedMetadataStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, - MongoDBStoreV3, NestedDirectoryStoreV3, RedisStoreV3, - SQLiteStoreV3, StoreV3, ZipStoreV3, atexit_rmglob, - atexit_rmtree, default_compressor, getsize, - init_array, normalize_store_arg) + MongoDBStoreV3, RedisStoreV3, SQLiteStoreV3, StoreV3, + ZipStoreV3, atexit_rmglob, atexit_rmtree, + default_compressor, getsize, init_array, normalize_store_arg) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var # pytest will fail to run if the following fixtures aren't imported here @@ -310,35 +309,9 @@ def create_store(self, normalize_keys=False, key_separator=".", **kwargs): key_separator=key_separator) -# TODO: remove NestedDirectoryStoreV3? -class TestNestedDirectoryStoreV3(_TestNestedDirectoryStore, - TestDirectoryStoreV3): - - def create_store(self, normalize_keys=False, **kwargs): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = NestedDirectoryStoreV3(path, normalize_keys=normalize_keys, **kwargs) - return store - - def test_init_array(self): - store = self.create_store() - # assert store._dimension_separator == "/" - path = 'arr1' - init_array(store, path=path, shape=1000, chunks=100) - - # check metadata - mkey = self.root + path + '.array.json' - assert mkey in store - meta = store._metadata_class.decode_array_metadata(store[mkey]) - assert (1000,) == meta['shape'] - assert (100,) == meta['chunk_grid']['chunk_shape'] - assert np.dtype(None) == meta['data_type'] - # assert meta['dimension_separator'] == "/" - assert meta['chunk_grid']['separator'] == "/" - # TODO: enable once N5StoreV3 has been implemented # @pytest.mark.skipif(True, reason="N5StoreV3 not yet fully implemented") -# class TestN5StoreV3(_TestN5Store, TestNestedDirectoryStoreV3, StoreV3Tests): +# class TestN5StoreV3(_TestN5Store, TestDirectoryStoreV3, StoreV3Tests): class TestZipStoreV3(_TestZipStore, StoreV3Tests): @@ -484,21 +457,21 @@ def test_normalize_store_arg_v3(tmpdir): if have_fsspec: path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=3, mode='w', clobber=True) + store = normalize_store_arg("file://" + path, zarr_version=3, mode='w') assert isinstance(store, FSStoreV3) assert 'zarr.json' in store fn = tmpdir.join('store.n5') with pytest.raises(NotImplementedError): - normalize_store_arg(str(fn), zarr_version=3, mode='w', clobber=True) + normalize_store_arg(str(fn), zarr_version=3, mode='w') # error on zarr_version=3 with a v2 store with pytest.raises(ValueError): - normalize_store_arg(KVStore(dict()), zarr_version=3, mode='w', clobber=True) + normalize_store_arg(KVStore(dict()), zarr_version=3, mode='w') # error on zarr_version=2 with a v3 store with pytest.raises(ValueError): - normalize_store_arg(KVStoreV3(dict()), zarr_version=2, mode='w', clobber=True) + normalize_store_arg(KVStoreV3(dict()), zarr_version=2, mode='w') class TestConsolidatedMetadataStoreV3(_TestConsolidatedMetadataStore): From 88405a8f6cb0a7534bf1a42ca29f74ef2faf1911 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 4 Mar 2022 15:56:16 -0500 Subject: [PATCH 100/109] flake8 --- zarr/creation.py | 1 - zarr/tests/test_storage_v3.py | 1 - 2 files changed, 2 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 3fe7917997..3950e79478 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -518,7 +518,6 @@ def open_array( zarr_version = getattr(chunk_store, '_store_version', 2) # handle polymorphic store arg - clobber = (mode == 'w') store = normalize_store_arg(store, storage_options=storage_options, mode=mode, zarr_version=zarr_version) zarr_version = getattr(store, '_store_version', 2) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 3911d18b25..bca0f79328 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -28,7 +28,6 @@ from .test_storage import TestLMDBStore as _TestLMDBStore from .test_storage import TestLRUStoreCache as _TestLRUStoreCache from .test_storage import TestMemoryStore as _TestMemoryStore -from .test_storage import TestNestedDirectoryStore as _TestNestedDirectoryStore from .test_storage import TestSQLiteStore as _TestSQLiteStore from .test_storage import TestSQLiteStoreInMemory as _TestSQLiteStoreInMemory from .test_storage import TestZipStore as _TestZipStore From 69e5a12751e9bd09c901d0b0bda94f4aff4472ce Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 8 Mar 2022 21:03:35 -0500 Subject: [PATCH 101/109] remove rmdir_abs: rmdir method of ABSStore parent class in ABSStoreV3 --- zarr/_storage/absstore.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 045079f244..056545cf18 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -179,7 +179,11 @@ def listdir(self, path=None): return items def rmdir(self, path=None): - rmdir_abs(self, path) + dir_path = normalize_storage_path(self._append_path_to_prefix(path)) + if dir_path: + dir_path += '/' + for blob in self.client.list_blobs(name_starts_with=dir_path): + self.client.delete_blob(blob) def getsize(self, path=None): store_path = normalize_storage_path(path) @@ -207,14 +211,6 @@ def clear(self): self.rmdir() -def rmdir_abs(store: ABSStore, path=None): - dir_path = normalize_storage_path(store._append_path_to_prefix(path)) - if dir_path: - dir_path += '/' - for blob in store.client.list_blobs(name_starts_with=dir_path): - store.client.delete_blob(blob) - - class ABSStoreV3(ABSStore, StoreV3): def list(self): @@ -232,21 +228,23 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def rmdir(self, path=None): + if not path: # Currently allowing clear to delete everything as in v2 # If we disallow an empty path then we will need to modify # TestABSStoreV3 to have the create_store method use a prefix. - rmdir_abs(self, '') + ABSStore.rmdir(self, '') return + meta_dir = 'meta/root/' + path meta_dir = meta_dir.rstrip('/') - rmdir_abs(self, meta_dir) + ABSStore.rmdir(self, meta_dir) # remove data folder data_dir = 'data/root/' + path data_dir = data_dir.rstrip('/') - rmdir_abs(self, data_dir) + ABSStore.rmdir(self, data_dir) # remove metadata files sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] From 58c8c20ec3ffd03e6b91c9ec0bfea7385b10da86 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 8 Mar 2022 21:59:31 -0500 Subject: [PATCH 102/109] define meta_root and data_root variables These define the root path for metadata and data, respectively --- zarr/_storage/absstore.py | 6 ++--- zarr/_storage/store.py | 24 ++++++++++------- zarr/convenience.py | 16 ++++++++---- zarr/hierarchy.py | 17 ++++++------ zarr/storage.py | 44 ++++++++++++++++--------------- zarr/tests/test_attrs.py | 5 ++-- zarr/tests/test_convenience.py | 26 +++++++++--------- zarr/tests/test_core.py | 10 ++++--- zarr/tests/test_hierarchy.py | 20 +++++++------- zarr/tests/test_storage.py | 28 ++++++++++---------- zarr/tests/test_storage_v3.py | 48 +++++++++++++++++----------------- zarr/tests/test_sync.py | 6 ++--- 12 files changed, 134 insertions(+), 116 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 056545cf18..8de288f47d 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -3,7 +3,7 @@ import warnings from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path -from zarr._storage.store import Store, StoreV3, _get_hierarchy_metadata +from zarr._storage.store import _get_hierarchy_metadata, data_root, meta_root, Store, StoreV3 __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], @@ -237,12 +237,12 @@ def rmdir(self, path=None): ABSStore.rmdir(self, '') return - meta_dir = 'meta/root/' + path + meta_dir = meta_root + path meta_dir = meta_dir.rstrip('/') ABSStore.rmdir(self, meta_dir) # remove data folder - data_dir = 'data/root/' + path + data_dir = data_root + path data_dir = data_dir.rstrip('/') ABSStore.rmdir(self, data_dir) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 470822e8fa..53c624c8c5 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -11,6 +11,10 @@ group_meta_key = '.zgroup' attrs_key = '.zattrs' +# v3 paths +meta_root = 'meta/root/' +data_root = 'data/root/' + class BaseStore(MutableMapping): """Abstract base class for store implementations. @@ -325,14 +329,14 @@ def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] src_path = src_path.rstrip('/') dst_path = dst_path.rstrip('/') - _src_array_json = 'meta/root/' + src_path + '.array' + sfx + _src_array_json = meta_root + src_path + '.array' + sfx if _src_array_json in store: - new_key = 'meta/root/' + dst_path + '.array' + sfx + new_key = meta_root + dst_path + '.array' + sfx store[new_key] = store.pop(_src_array_json) any_renamed = True - _src_group_json = 'meta/root/' + src_path + '.group' + sfx + _src_group_json = meta_root + src_path + '.group' + sfx if _src_group_json in store: - new_key = 'meta/root/' + dst_path + '.group' + sfx + new_key = meta_root + dst_path + '.group' + sfx store[new_key] = store.pop(_src_group_json) any_renamed = True return any_renamed @@ -350,7 +354,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: store[new_key] = store.pop(key) else: any_renamed = False - for root_prefix in ['meta/root/', 'data/root/']: + for root_prefix in [meta_root, data_root]: _src_prefix = root_prefix + src_prefix _dst_prefix = root_prefix + dst_prefix for key in store.list_prefix(_src_prefix): # type: ignore @@ -374,12 +378,12 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = 'meta/root/' + path + meta_dir = meta_root + path meta_dir = meta_dir.rstrip('/') _rmdir_from_keys(store, meta_dir) # remove data folder - data_dir = 'data/root/' + path + data_dir = data_root + path data_dir = data_dir.rstrip('/') _rmdir_from_keys(store, data_dir) @@ -409,7 +413,7 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - key = "meta/root/" + prefix.rstrip("/") + ".array" + sfx + key = meta_root + prefix.rstrip("/") + ".array" + sfx else: raise ValueError("prefix must be supplied to get a v3 array key") else: @@ -421,7 +425,7 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - key = "meta/root/" + prefix.rstrip('/') + ".group" + sfx + key = meta_root + prefix.rstrip('/') + ".group" + sfx else: raise ValueError("prefix must be supplied to get a v3 group key") else: @@ -434,7 +438,7 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: # for v3, attributes are stored in the array metadata sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] if prefix: - key = "meta/root/" + prefix.rstrip('/') + ".array" + sfx + key = meta_root + prefix.rstrip('/') + ".array" + sfx else: raise ValueError("prefix must be supplied to get a v3 array key") else: diff --git a/zarr/convenience.py b/zarr/convenience.py index a2a4368476..c50380a53b 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -5,6 +5,7 @@ import re from collections.abc import Mapping, MutableMapping +from zarr._storage.store import data_root, meta_root from zarr.core import Array from zarr.creation import array as _create_array from zarr.creation import open_array @@ -650,6 +651,11 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, if source_store_version != dest_store_version: raise ValueError("zarr stores must share the same protocol version") + if source_store_version > 2: + nchar_root = len(meta_root) + # code below assumes len(meta_root) === len(data_root) + assert len(data_root) == nchar_root + # setup logging with _LogWriter(log) as log: @@ -661,8 +667,8 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, if not source_key.startswith(source_path): continue elif source_store_version == 3: - # 'meta/root/' or 'data/root/' have length 10 - if not source_key[10:].startswith(source_path): + # skip 'meta/root/' or 'data/root/' at start of source_key + if not source_key[nchar_root:].startswith(source_path): continue # process excludes and includes @@ -684,9 +690,9 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, key_suffix = source_key[len(source_path):] dest_key = dest_path + key_suffix elif source_store_version == 3: - # 10 is length of 'meta/root/' or 'data/root/' - key_suffix = source_key[10 + len(source_path):] - dest_key = source_key[:10] + dest_path + key_suffix + # nchar_root is length of 'meta/root/' or 'data/root/' + key_suffix = source_key[nchar_root + len(source_path):] + dest_key = source_key[:nchar_root] + dest_path + key_suffix # create a descriptive label for this operation descr = source_key diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 6fb668c034..2aadbfc168 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -3,6 +3,7 @@ import numpy as np +from zarr._storage.store import data_root, meta_root from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import (array, create, empty, empty_like, full, full_like, @@ -130,8 +131,8 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._version = zarr_version if self._version == 3: - self._data_key_prefix = 'data/root/' + self._key_prefix - self._data_path = 'data/root/' + self._path + self._data_key_prefix = data_root + self._key_prefix + self._data_path = data_root + self._path self._hierarchy_metadata = _get_hierarchy_metadata(store=None) self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] @@ -148,7 +149,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, if self._version == 2: raise GroupNotFoundError(path) else: - implicit_prefix = 'meta/root/' + self._key_prefix + implicit_prefix = meta_root + self._key_prefix if self._store.list_prefix(implicit_prefix): # implicit group does not have any metadata self._meta = None @@ -264,7 +265,7 @@ def __iter__(self): # TODO: Should this iterate over data folders and/or metadata # folders and/or metadata files - dir_path = 'meta/root/' + self._key_prefix + dir_path = meta_root + self._key_prefix name_start = len(dir_path) keys, prefixes = self._store.list_dir(dir_path) @@ -411,7 +412,7 @@ def __getitem__(self, item): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, zarr_version=self._version) elif self._version == 3: - implicit_group = 'meta/root/' + path + '/' + implicit_group = meta_root + path + '/' # non-empty folder in the metadata path implies an implicit group if self._store.list_prefix(implicit_group): return Group(self._store, read_only=self._read_only, path=path, @@ -474,7 +475,7 @@ def group_keys(self): if contains_group(self._store, path): yield key else: - dir_name = 'meta/root/' + self._path + dir_name = meta_root + self._path sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] group_sfx = '.group' + sfx for key in sorted(listdir(self._store, dir_name)): @@ -518,7 +519,7 @@ def groups(self): zarr_version=self._version) else: - dir_name = 'meta/root/' + self._path + dir_name = meta_root + self._path sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] group_sfx = '.group' + sfx for key in sorted(listdir(self._store, dir_name)): @@ -605,7 +606,7 @@ def _array_iter(self, keys_only, method, recurse): for i in getattr(group, method)(recurse=recurse): yield i else: - dir_name = 'meta/root/' + self._path + dir_name = meta_root + self._path sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] array_sfx = '.array' + sfx for key in sorted(listdir(self._store, dir_name)): diff --git a/zarr/storage.py b/zarr/storage.py index 478f057ac3..36c34c5949 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -68,8 +68,10 @@ _prefix_to_array_key, _prefix_to_group_key, array_meta_key, - group_meta_key, attrs_key, + data_root, + group_meta_key, + meta_root, BaseStore, Store, StoreV3) @@ -244,8 +246,8 @@ def _getsize(store: BaseStore, path: Path = None) -> int: size = 0 store_version = getattr(store, '_store_version', 2) if store_version == 3: - members = store.list_prefix('data/root/' + path) # type: ignore - members += store.list_prefix('meta/root/' + path) # type: ignore + members = store.list_prefix(data_root + path) # type: ignore + members += store.list_prefix(meta_root + path) # type: ignore # members += ['zarr.json'] else: members = listdir(store, path) @@ -469,7 +471,7 @@ def _init_array_metadata( else: group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = 'data/root/' + _path_to_prefix(path) + data_prefix = data_root + _path_to_prefix(path) # attempt to delete any pre-existing array in store if array_meta_key in store: @@ -484,7 +486,7 @@ def _init_array_metadata( # path is a subfolder of an existing array, remove that array parent_path = '/'.join(path.split('/')[:-1]) sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] - array_key = 'meta/root/' + parent_path + '.array' + sfx + array_key = meta_root + parent_path + '.array' + sfx if array_key in store: store.erase(array_key) # type: ignore @@ -668,8 +670,8 @@ def _init_group_metadata( else: group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = 'data/root/' + _path_to_prefix(path) - meta_prefix = 'meta/root/' + _path_to_prefix(path) + data_prefix = data_root + _path_to_prefix(path) + meta_prefix = meta_root + _path_to_prefix(path) # attempt to delete any pre-existing array in store if array_meta_key in store: @@ -2890,7 +2892,7 @@ def _get_files_and_dirs_from_path(store, path): dirs = [] # add array and group folders if present - for d in ['data/root/' + path, 'meta/root/' + path]: + for d in [data_root + path, meta_root + path]: dir_path = os.path.join(store.path, d) if os.path.exists(dir_path): dirs.append(dir_path) @@ -2984,14 +2986,14 @@ def rmdir(self, path=None): if self.mode == 'r': raise ReadOnlyError() if path: - for base in ['meta/root/', 'data/root/']: + for base in [meta_root, data_root]: store_path = self.dir_path(base + path) if self.fs.isdir(store_path): self.fs.rm(store_path, recursive=True) # remove any associated metadata files sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] - meta_dir = ('meta/root/' + path).rstrip('/') + meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx @@ -3035,7 +3037,7 @@ def rename(self, src_path: Path, dst_path: Path): dst_path = normalize_storage_path(dst_path) any_renamed = False - for base in ['meta/root/', 'data/root/']: + for base in [meta_root, data_root]: if self.list_prefix(base + src_path): src_parent, src_key = self._get_parent(base + src_path) dst_parent, dst_key = self._require_parent(base + dst_path) @@ -3049,7 +3051,7 @@ def rename(self, src_path: Path, dst_path: Path): def rmdir(self, path: Path = None): path = normalize_storage_path(path) if path: - for base in ['meta/root/', 'data/root/']: + for base in [meta_root, data_root]: try: parent, key = self._get_parent(base + path) value = parent[key] @@ -3061,7 +3063,7 @@ def rmdir(self, path: Path = None): # remove any associated metadata files sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] - meta_dir = ('meta/root/' + path).rstrip('/') + meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx @@ -3122,14 +3124,14 @@ def rmdir(self, path=None): store_path = normalize_storage_path(path) dir_path = self.path if store_path: - for base in ['meta/root/', 'data/root/']: + for base in [meta_root, data_root]: dir_path = os.path.join(dir_path, base + store_path) if os.path.isdir(dir_path): shutil.rmtree(dir_path) # remove any associated metadata files sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] - meta_dir = ('meta/root/' + path).rstrip('/') + meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx @@ -3162,8 +3164,8 @@ def __setitem__(self, key, value): def getsize(self, path=None): path = normalize_storage_path(path) with self.mutex: - children = self.list_prefix('data/root/' + path) - children += self.list_prefix('meta/root/' + path) + children = self.list_prefix(data_root + path) + children += self.list_prefix(meta_root + path) print(f"path={path}, children={children}") if children: size = 0 @@ -3242,7 +3244,7 @@ def getsize(self, path=None): # TODO: why does the query below not work in this case? # For now fall back to the default _getsize implementation # size = 0 - # for _path in ['data/root/' + path, 'meta/root/' + path]: + # for _path in [data_root + path, meta_root + path]: # c = self.cursor.execute( # ''' # SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr @@ -3265,14 +3267,14 @@ def __setitem__(self, key, value): def rmdir(self, path=None): path = normalize_storage_path(path) if path: - for base in ['meta/root/', 'data/root/']: + for base in [meta_root, data_root]: with self.lock: self.cursor.execute( 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) ) # remove any associated metadata files sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] - meta_dir = ('meta/root/' + path).rstrip('/') + meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) group_meta_file = meta_dir + '.group' + sfx @@ -3342,7 +3344,7 @@ class ConsolidatedMetadataStoreV3(ConsolidatedMetadataStore, StoreV3): """ - def __init__(self, store: StoreLike, metadata_key="meta/root/consolidated/.zmetadata"): + def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zmetadata"): self.store = StoreV3._ensure_store(store) # retrieve consolidated metadata diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 1435b64dcc..dbbc19328a 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -2,6 +2,7 @@ import pytest +from zarr._storage.store import meta_root from zarr.attrs import Attributes from zarr.storage import KVStore, KVStoreV3 from zarr.tests.util import CountingDict, CountingDictV3 @@ -22,13 +23,13 @@ def _init_store(version): class TestAttributes(): def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): - root = '.z' if zarr_version == 2 else 'meta/root/' + root = '.z' if zarr_version == 2 else meta_root return Attributes(store, key=root + 'attrs', read_only=read_only, cache=cache) def test_storage(self, zarr_version): store = _init_store(zarr_version) - root = '.z' if zarr_version == 2 else 'meta/root/' + root = '.z' if zarr_version == 2 else meta_root attrs_key = root + 'attrs' a = Attributes(store=store, key=attrs_key) assert isinstance(a.store, KVStore) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 7aa4d2cefe..74c8d06fac 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -35,6 +35,8 @@ MemoryStoreV3, SQLiteStoreV3, atexit_rmtree, + data_root, + meta_root, getsize, ) from zarr.tests.util import have_fsspec @@ -131,10 +133,10 @@ def test_zarr_v3_save_multiple_unnamed(): # no path provided save_group(store, x, y, path='dataset', zarr_version=3) # names become arr_{i} for unnamed *args - assert 'data/root/dataset/arr_0/c0' in store - assert 'data/root/dataset/arr_1/c0' in store - assert 'meta/root/dataset/arr_0.array.json' in store - assert 'meta/root/dataset/arr_1.array.json' in store + assert data_root + 'dataset/arr_0/c0' in store + assert data_root + 'dataset/arr_1/c0' in store + assert meta_root + 'dataset/arr_0.array.json' in store + assert meta_root + 'dataset/arr_1.array.json' in store def test_zarr_v3_save_errors(): @@ -252,10 +254,10 @@ def test_consolidate_metadata(with_chunk_store, zarr_version): assert isinstance(out._store, ConsolidatedMetadataStoreV3) assert 'meta/root/consolidated/.zmetadata' in store meta_keys = ['zarr.json', - 'meta/root/dataset.group.json', - 'meta/root/dataset/g1.group.json', - 'meta/root/dataset/g2.group.json', - 'meta/root/dataset/g2/arr.array.json', + meta_root + 'dataset.group.json', + meta_root + 'dataset/g1.group.json', + meta_root + 'dataset/g2.group.json', + meta_root + 'dataset/g2/arr.array.json', 'meta/root/consolidated.group.json'] for key in meta_keys: del store[key] @@ -279,9 +281,9 @@ def test_consolidate_metadata(with_chunk_store, zarr_version): else: cmd = ConsolidatedMetadataStoreV3(store) with pytest.raises(PermissionError): - del cmd['meta/root/dataset.group.json'] + del cmd[meta_root + 'dataset.group.json'] with pytest.raises(PermissionError): - cmd['meta/root/dataset.group.json'] = None + cmd[meta_root + 'dataset.group.json'] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) @@ -457,7 +459,7 @@ def test_excludes_includes(self): copy_store(source, dest, excludes=excludes) assert len(dest) == 2 - root = '' if self._version == 2 else 'meta/root/' + root = '' if self._version == 2 else meta_root assert root + 'foo' not in dest # multiple excludes @@ -488,7 +490,7 @@ def test_dry_run(self): def test_if_exists(self): source = self.source dest = self._get_dest_store() - root = '' if self._version == 2 else 'meta/root/' + root = '' if self._version == 2 else meta_root dest[root + 'bar/baz'] = b'mmm' # default ('raise') diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1d5ee044d9..08bda94ba2 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -47,8 +47,10 @@ StoreV3, atexit_rmglob, atexit_rmtree, + data_root, init_array, init_group, + meta_root, ) from zarr.util import buffer_size from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec @@ -2739,13 +2741,13 @@ def test_array_init(self): # can't open at same path as an existing group with pytest.raises(ContainsGroupError): init_array(store, shape=100, chunks=10, path=path, dtype=' 2 and g1.store.is_erasable(): arr_path = g1.path + '/arr1' sfx = _get_hierarchy_metadata(g1.store)['metadata_key_suffix'] - array_meta_file = 'meta/root/' + arr_path + '.array' + sfx + array_meta_file = meta_root + arr_path + '.array' + sfx assert array_meta_file in g1.store - group_meta_file = 'meta/root/' + g2.path + '.group' + sfx + group_meta_file = meta_root + g2.path + '.group' + sfx assert group_meta_file in g1.store # rmdir on the array path should also remove the metadata file @@ -1230,9 +1230,9 @@ def test_round_trip_nd(self): f = open_group(store, path='group2', mode='w') data_size = data.nbytes - group_meta_size = buffer_size(store['meta/root/group.group.json']) - group2_meta_size = buffer_size(store['meta/root/group2.group.json']) - array_meta_size = buffer_size(store['meta/root/group/raw.array.json']) + group_meta_size = buffer_size(store[meta_root + 'group.group.json']) + group2_meta_size = buffer_size(store[meta_root + 'group2.group.json']) + array_meta_size = buffer_size(store[meta_root + 'group/raw.array.json']) assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size # added case with path to complete coverage assert store.getsize('group') == data_size + group_meta_size + array_meta_size @@ -1462,12 +1462,12 @@ def test_chunk_store(self): assert_array_equal(np.arange(100), a[:]) # check store keys - group_key = 'meta/root/' + path + '.group.json' - array_key = 'meta/root/' + path + '/foo' + '.array.json' + group_key = meta_root + path + '.group.json' + array_key = meta_root + path + '/foo' + '.array.json' expect = sorted([group_key, array_key, 'zarr.json']) actual = sorted(store.keys()) assert expect == actual - expect = ['data/root/' + path + '/foo/c' + str(i) for i in range(10)] + expect = [data_root + path + '/foo/c' + str(i) for i in range(10)] expect += ['zarr.json'] actual = sorted(chunk_store.keys()) assert expect == actual diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 3f4a3d204d..6f5aac8011 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -31,9 +31,9 @@ NestedDirectoryStore, RedisStore, SQLiteStore, Store, TempStore, ZipStore, KVStoreV3, array_meta_key, atexit_rmglob, atexit_rmtree, - attrs_key, default_compressor, getsize, + attrs_key, data_root, default_compressor, getsize, group_meta_key, init_array, init_group, migrate_1to2, - normalize_store_arg) + meta_root, normalize_store_arg) from zarr.storage import FSStore, rename, listdir from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container @@ -508,7 +508,7 @@ def _test_init_array_overwrite(self, order): filters=None) else: path = 'arr1' # no default, have to specify for v3 - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' meta = dict(shape=(2000,), chunk_grid=dict(type='regular', chunk_shape=(200,), @@ -553,7 +553,7 @@ def test_init_array_path(self): if self.version == 2: mkey = path + '/' + array_meta_key else: - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) if self.version == 2: @@ -584,7 +584,7 @@ def _test_init_array_overwrite_path(self, order): order=order, filters=None) else: - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' meta = dict(shape=(2000,), chunk_grid=dict(type='regular', chunk_shape=(200,), @@ -632,8 +632,8 @@ def test_init_array_overwrite_group(self): array_key = path + '/' + array_meta_key group_key = path + '/' + group_meta_key else: - array_key = 'meta/root/' + path + '.array.json' - group_key = 'meta/root/' + path + '.group.json' + array_key = meta_root + path + '.array.json' + group_key = meta_root + path + '.group.json' store[group_key] = store._metadata_class.encode_group_metadata() # don't overwrite @@ -679,8 +679,8 @@ def _test_init_array_overwrite_chunk_store(self, order): order=order) else: path = 'arr1' - data_path = 'data/root/arr1/' - mkey = 'meta/root/' + path + '.array.json' + data_path = data_root + 'arr1/' + mkey = meta_root + path + '.array.json' meta = dict(shape=(2000,), chunk_grid=dict(type='regular', chunk_shape=(200,), @@ -730,7 +730,7 @@ def test_init_array_compat(self): mkey = array_meta_key else: path = 'arr1' - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' init_array(store, path=path, shape=1000, chunks=100, compressor='none') meta = store._metadata_class.decode_array_metadata(store[mkey]) if self.version == 2: @@ -746,7 +746,7 @@ def test_init_group(self): mkey = group_meta_key else: path = 'foo' - mkey = 'meta/root/' + path + '.group.json' + mkey = meta_root + path + '.group.json' init_group(store, path=path) # check metadata @@ -823,8 +823,8 @@ def _test_init_group_overwrite_path(self, order): filters=None, chunk_memory_layout=order, ) - array_key = 'meta/root/' + path + '.array.json' - group_key = 'meta/root/' + path + '.group.json' + array_key = meta_root + path + '.array.json' + group_key = meta_root + path + '.group.json' store[array_key] = store._metadata_class.encode_array_metadata(meta) # don't overwrite @@ -2361,7 +2361,7 @@ def test_iterators_with_prefix(self): assert set() == set(store.values()) assert set() == set(store.items()) - prefix = 'meta/root/' if self.version > 2 else '' + prefix = meta_root if self.version > 2 else '' # setup some values store[prefix + 'a'] = b'aaa' store[prefix + 'b'] = b'bbb' diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index bca0f79328..482be11a1a 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -5,13 +5,13 @@ import numpy as np import pytest -from zarr._storage.store import _valid_key_characters from zarr.storage import (ABSStoreV3, ConsolidatedMetadataStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, MongoDBStoreV3, RedisStoreV3, SQLiteStoreV3, StoreV3, - ZipStoreV3, atexit_rmglob, atexit_rmtree, - default_compressor, getsize, init_array, normalize_store_arg) + ZipStoreV3, atexit_rmglob, atexit_rmtree, data_root, + default_compressor, getsize, init_array, meta_root, + normalize_store_arg) from zarr.tests.util import CountingDictV3, have_fsspec, skip_test_env_var # pytest will fail to run if the following fixtures aren't imported here @@ -104,10 +104,10 @@ def test_valid_key(): assert not store._valid_key(5) assert not store._valid_key(2.8) - for key in _valid_key_characters: + for key in store._valid_key_characters: assert store._valid_key(key) - # other characters not in _valid_key_characters are not allowed + # other characters not in store._valid_key_characters are not allowed assert not store._valid_key('*') assert not store._valid_key('~') assert not store._valid_key('^') @@ -123,9 +123,9 @@ def test_validate_key(): store._validate_key('zar.json') # valid ascii keys - for valid in ['meta/root/arr1.array.json', - 'data/root/arr1.array.json', - 'meta/root/subfolder/item_1-0.group.json']: + for valid in [meta_root + 'arr1.array.json', + data_root + 'arr1.array.json', + meta_root + 'subfolder/item_1-0.group.json']: store._validate_key(valid) # but otherwise valid keys cannot end in / with pytest.raises(ValueError): @@ -139,7 +139,7 @@ def test_validate_key(): class StoreV3Tests(_StoreTests): version = 3 - root = 'meta/root/' + root = meta_root def test_getsize(self): # TODO: determine proper getsize() behavior for v3 @@ -151,23 +151,23 @@ def test_getsize(self): store = self.create_store() if isinstance(store, dict) or hasattr(store, 'getsize'): assert 0 == getsize(store, 'zarr.json') - store['meta/root/foo/a'] = b'x' + store[meta_root + 'foo/a'] = b'x' assert 1 == getsize(store) assert 1 == getsize(store, 'foo') - store['meta/root/foo/b'] = b'x' + store[meta_root + 'foo/b'] = b'x' assert 2 == getsize(store, 'foo') assert 1 == getsize(store, 'foo/b') - store['meta/root/bar/a'] = b'yy' + store[meta_root + 'bar/a'] = b'yy' assert 2 == getsize(store, 'bar') - store['data/root/bar/a'] = b'zzz' + store[data_root + 'bar/a'] = b'zzz' assert 5 == getsize(store, 'bar') - store['data/root/baz/a'] = b'zzz' + store[data_root + 'baz/a'] = b'zzz' assert 3 == getsize(store, 'baz') assert 10 == getsize(store) - store['data/root/quux'] = array.array('B', b'zzzz') + store[data_root + 'quux'] = array.array('B', b'zzzz') assert 14 == getsize(store) assert 4 == getsize(store, 'quux') - store['data/root/spong'] = np.frombuffer(b'zzzzz', dtype='u1') + store[data_root + 'spong'] = np.frombuffer(b'zzzzz', dtype='u1') assert 19 == getsize(store) assert 5 == getsize(store, 'spong') store.close() @@ -182,7 +182,7 @@ def test_init_array(self, dimension_separator_fixture_v3): dimension_separator=pass_dim_sep) # check metadata - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) assert (1000,) == meta['shape'] @@ -200,15 +200,15 @@ def test_list_prefix(self): path = 'arr1' init_array(store, path=path, shape=1000, chunks=100) - expected = ['meta/root/arr1.array.json', 'zarr.json'] + expected = [meta_root + 'arr1.array.json', 'zarr.json'] assert sorted(store.list_prefix('')) == expected - expected = ['meta/root/arr1.array.json'] - assert sorted(store.list_prefix('meta/root')) == expected + expected = [meta_root + 'arr1.array.json'] + assert sorted(store.list_prefix(meta_root.rstrip('/'))) == expected # cannot start prefix with '/' with pytest.raises(ValueError): - store.list_prefix(prefix='/meta/root') + store.list_prefix(prefix='/' + meta_root.rstrip('/')) def test_equal(self): store = self.create_store() @@ -255,7 +255,7 @@ def create_store(self, normalize_keys=False, **kwargs): def test_rename_nonexisting(self): store = self.create_store() with pytest.raises(FileNotFoundError): - store.rename('meta/root/a', 'meta/root/b') + store.rename(meta_root + 'a', meta_root + 'b') @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @@ -283,7 +283,7 @@ def test_init_array(self): init_array(store, path=path, shape=1000, chunks=100) # check metadata - mkey = 'meta/root/' + path + '.array.json' + mkey = meta_root + path + '.array.json' assert mkey in store meta = store._metadata_class.decode_array_metadata(store[mkey]) assert (1000,) == meta['shape'] @@ -480,7 +480,7 @@ class TestConsolidatedMetadataStoreV3(_TestConsolidatedMetadataStore): @property def metadata_key(self): - return 'meta/root/consolidated/.zmetadata' + return meta_root + 'consolidated/.zmetadata' def test_bad_store_version(self): with pytest.raises(ValueError): diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 3cee0d0446..b2bd9e35bb 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -13,7 +13,7 @@ from zarr.core import Array from zarr.hierarchy import Group from zarr.storage import (DirectoryStore, KVStore, atexit_rmtree, init_array, - init_group) + init_group, meta_root) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer # zarr_version fixture must be imported although not used directly here from zarr.tests.test_attrs import TestAttributes, zarr_version # noqa @@ -24,7 +24,7 @@ class TestAttributesWithThreadSynchronizer(TestAttributes): def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): - key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' + key = '.zattrs' if zarr_version == 2 else meta_root + 'attrs' synchronizer = ThreadSynchronizer() return Attributes(store, synchronizer=synchronizer, key=key, read_only=read_only, cache=cache) @@ -33,7 +33,7 @@ def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_ class TestAttributesProcessSynchronizer(TestAttributes): def init_attributes(self, store, read_only=False, cache=True, zarr_version=zarr_version): - key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' + key = '.zattrs' if zarr_version == 2 else meta_root + 'attrs' sync_path = mkdtemp() atexit.register(shutil.rmtree, sync_path) synchronizer = ProcessSynchronizer(sync_path) From 5e1ed7fd1037fcdc3c63953aaad7bb73e44a8980 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 8 Mar 2022 22:00:11 -0500 Subject: [PATCH 103/109] move _valid_key_characters to be a StoreV3 class field --- zarr/_storage/store.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 53c624c8c5..ce3540e1ae 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -141,15 +141,12 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) -_valid_key_characters = set(ascii_letters + digits + "/.-_") - - class StoreV3(BaseStore): _store_version = 3 _metadata_class = Metadata3 + _valid_key_characters = set(ascii_letters + digits + "/.-_") - @staticmethod - def _valid_key(key: str) -> bool: + def _valid_key(self, key: str) -> bool: """ Verify that a key conforms to the specification. @@ -159,12 +156,11 @@ def _valid_key(key: str) -> bool: """ if not isinstance(key, str) or not key.isascii(): return False - if set(key) - _valid_key_characters: + if set(key) - self._valid_key_characters: return False return True - @staticmethod - def _validate_key(key: str): + def _validate_key(self, key: str): """ Verify that a key conforms to the v3 specification. @@ -177,10 +173,10 @@ def _validate_key(key: str): to the user, and is a store implementation detail, so this method will raise a ValueError in that case. """ - if not StoreV3._valid_key(key): + if not self._valid_key(key): raise ValueError( f"Keys must be ascii strings and may only contain the " - f"characters {''.join(sorted(_valid_key_characters))}" + f"characters {''.join(sorted(se_valid_key_characters))}" ) if ( From 3593cadd09d36cc9131f8d1df9cb4df0185a72b4 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 8 Mar 2022 22:43:27 -0500 Subject: [PATCH 104/109] make _get_hierarchy_metadata strictly require 'zarr.json' Still use a default set of metadata in __init__ method of Group or Array classes. Add a _get_metadata_suffix helper that defaults to '.json' if metadata is not present. --- zarr/_storage/absstore.py | 4 ++-- zarr/_storage/store.py | 44 +++++++++++++++++++----------------- zarr/convenience.py | 4 ++-- zarr/core.py | 6 ++++- zarr/hierarchy.py | 25 ++++++++++---------- zarr/storage.py | 16 +++++++------ zarr/tests/test_hierarchy.py | 4 ++-- 7 files changed, 56 insertions(+), 47 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 8de288f47d..cc41018f9e 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -3,7 +3,7 @@ import warnings from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path -from zarr._storage.store import _get_hierarchy_metadata, data_root, meta_root, Store, StoreV3 +from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, Store, StoreV3 __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], @@ -247,7 +247,7 @@ def rmdir(self, path=None): ABSStore.rmdir(self, data_dir) # remove metadata files - sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + sfx = _get_metadata_suffix(self) array_meta_file = meta_dir + '.array' + sfx if array_meta_file in self: del self[array_meta_file] diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index ce3540e1ae..dde1deb287 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,9 +1,9 @@ import abc from collections.abc import MutableMapping from string import ascii_letters, digits -from typing import Any, List, Optional, Union +from typing import Any, List, Mapping, Optional, Union -from zarr.meta import Metadata2, Metadata3, _default_entry_point_metadata_v3 +from zarr.meta import Metadata2, Metadata3 from zarr.util import normalize_storage_path # v2 store keys @@ -176,7 +176,7 @@ def _validate_key(self, key: str): if not self._valid_key(key): raise ValueError( f"Keys must be ascii strings and may only contain the " - f"characters {''.join(sorted(se_valid_key_characters))}" + f"characters {''.join(sorted(self._valid_key_characters))}" ) if ( @@ -212,7 +212,7 @@ def erase_prefix(self, prefix): def list_dir(self, prefix): """ - Note: carefully test this with trailing/leading slashes + TODO: carefully test this with trailing/leading slashes """ if prefix: # allow prefix = "" ? assert prefix.endswith("/") @@ -305,24 +305,26 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -# TODO: Should this return default metadata or raise an Error if zarr.json -# is absent? -def _get_hierarchy_metadata(store=None): - meta = _default_entry_point_metadata_v3 - if store is not None: - version = getattr(store, '_store_version', 2) - if version < 3: - raise ValueError("zarr.json hierarchy metadata not stored for " - f"zarr v{version} stores") - if 'zarr.json' in store: - meta = store._metadata_class.decode_hierarchy_metadata(store['zarr.json']) - return meta +def _get_hierarchy_metadata(store: StoreV3) -> Mapping[str, Any]: + version = getattr(store, '_store_version', 2) + if version < 3: + raise ValueError("zarr.json hierarchy metadata not stored for " + f"zarr v{version} stores") + if 'zarr.json' not in store: + raise ValueError("zarr.json metadata not found in store") + return store._metadata_class.decode_hierarchy_metadata(store['zarr.json']) + + +def _get_metadata_suffix(store: StoreV3) -> str: + if 'zarr.json' in store: + return _get_hierarchy_metadata(store)['metadata_key_suffix'] + return '.json' def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: """Rename source or group metadata file associated with src_path.""" any_renamed = False - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) src_path = src_path.rstrip('/') dst_path = dst_path.rstrip('/') _src_array_json = meta_root + src_path + '.array' + sfx @@ -384,7 +386,7 @@ def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: _rmdir_from_keys(store, data_dir) # remove metadata files - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) array_meta_file = meta_dir + '.array' + sfx if array_meta_file in store: store.erase(array_meta_file) # type: ignore @@ -408,7 +410,7 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) key = meta_root + prefix.rstrip("/") + ".array" + sfx else: raise ValueError("prefix must be supplied to get a v3 array key") @@ -420,7 +422,7 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) key = meta_root + prefix.rstrip('/') + ".group" + sfx else: raise ValueError("prefix must be supplied to get a v3 group key") @@ -432,7 +434,7 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: # for v3, attributes are stored in the array metadata - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) if prefix: key = meta_root + prefix.rstrip('/') + ".array" + sfx else: diff --git a/zarr/convenience.py b/zarr/convenience.py index c50380a53b..5d417363ab 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -14,7 +14,7 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import (_get_hierarchy_metadata, contains_array, contains_group, +from zarr.storage import (_get_metadata_suffix, contains_array, contains_group, normalize_store_arg, BaseStore, ConsolidatedMetadataStore, ConsolidatedMetadataStoreV3) from zarr.util import TreeViewer, buffer_size, normalize_storage_path @@ -1209,7 +1209,7 @@ def is_zarr_key(key): else: - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) def is_zarr_key(key): return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or diff --git a/zarr/core.py b/zarr/core.py index 4db2146a05..132f2ad7d4 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -32,6 +32,7 @@ is_scalar, pop_fields, ) +from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import ( _get_hierarchy_metadata, _prefix_to_array_key, @@ -193,7 +194,10 @@ def __init__( if self._version == 3: self._data_key_prefix = 'data/root/' + self._key_prefix self._data_path = 'data/root/' + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=None) + if 'zarr.json' not in self._store: + self._hierarchy_metadata = _default_entry_point_metadata_v3 + else: + self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] # initialize metadata diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 2aadbfc168..47c3933646 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -3,7 +3,7 @@ import numpy as np -from zarr._storage.store import data_root, meta_root +from zarr._storage.store import _get_metadata_suffix, data_root, meta_root from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import (array, create, empty, empty_like, full, full_like, @@ -14,6 +14,7 @@ GroupNotFoundError, ReadOnlyError, ) +from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import ( _get_hierarchy_metadata, _prefix_to_group_key, @@ -133,8 +134,11 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, if self._version == 3: self._data_key_prefix = data_root + self._key_prefix self._data_path = data_root + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=None) - self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] + if 'zarr.json' not in self._store: + self._hierarchy_metadata = _default_entry_point_metadata_v3 + else: + self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) + self._metadata_key_suffix = _get_metadata_suffix(store=self._store) # guard conditions if contains_array(store, path=self._path): @@ -476,13 +480,12 @@ def group_keys(self): yield key else: dir_name = meta_root + self._path - sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] - group_sfx = '.group' + sfx + group_sfx = '.group' + self._metadata_key_suffix for key in sorted(listdir(self._store, dir_name)): if key.endswith(group_sfx): key = key[:-len(group_sfx)] path = self._key_prefix + key - if path.endswith(".array" + sfx): + if path.endswith(".array" + self._metadata_key_suffix): # skip array keys continue if contains_group(self._store, path, explicit_only=False): @@ -520,13 +523,12 @@ def groups(self): else: dir_name = meta_root + self._path - sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] - group_sfx = '.group' + sfx + group_sfx = '.group' + self._metadata_key_suffix for key in sorted(listdir(self._store, dir_name)): if key.endswith(group_sfx): key = key[:-len(group_sfx)] path = self._key_prefix + key - if path.endswith(".array" + sfx): + if path.endswith(".array" + self._metadata_key_suffix): # skip array keys continue if contains_group(self._store, path, explicit_only=False): @@ -607,14 +609,13 @@ def _array_iter(self, keys_only, method, recurse): yield i else: dir_name = meta_root + self._path - sfx = _get_hierarchy_metadata(self._store)['metadata_key_suffix'] - array_sfx = '.array' + sfx + array_sfx = '.array' + self._metadata_key_suffix for key in sorted(listdir(self._store, dir_name)): if key.endswith(array_sfx): key = key[:-len(array_sfx)] path = self._key_prefix + key assert not path.startswith("meta") - if key.endswith('.group' + sfx): + if key.endswith('.group' + self._metadata_key_suffix): # skip group metadata keys continue if contains_array(self._store, path): diff --git a/zarr/storage.py b/zarr/storage.py index 36c34c5949..ebe99d1ca3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -58,7 +58,8 @@ normalize_shape, normalize_storage_path, retry_call) from zarr._storage.absstore import ABSStore, ABSStoreV3 # noqa: F401 -from zarr._storage.store import (_get_hierarchy_metadata, +from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 + _get_metadata_suffix, _listdir_from_keys, _rename_from_keys, _rename_metadata_v3, @@ -117,7 +118,8 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b if key in store: return True # for v3, need to also handle implicit groups - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + + sfx = _get_metadata_suffix(store) implicit_prefix = key.replace('.group' + sfx, '') if not implicit_prefix.endswith('/'): implicit_prefix += '/' @@ -485,7 +487,7 @@ def _init_array_metadata( if '/' in path: # path is a subfolder of an existing array, remove that array parent_path = '/'.join(path.split('/')[:-1]) - sfx = _get_hierarchy_metadata(store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(store) array_key = meta_root + parent_path + '.array' + sfx if array_key in store: store.erase(array_key) # type: ignore @@ -2992,7 +2994,7 @@ def rmdir(self, path=None): self.fs.rm(store_path, recursive=True) # remove any associated metadata files - sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) @@ -3062,7 +3064,7 @@ def rmdir(self, path: Path = None): del parent[key] # remove any associated metadata files - sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) @@ -3130,7 +3132,7 @@ def rmdir(self, path=None): shutil.rmtree(dir_path) # remove any associated metadata files - sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) @@ -3273,7 +3275,7 @@ def rmdir(self, path=None): 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) ) # remove any associated metadata files - sfx = _get_hierarchy_metadata(self)['metadata_key_suffix'] + sfx = _get_metadata_suffix(self) meta_dir = (meta_root + path).rstrip('/') array_meta_file = meta_dir + '.array' + sfx self.pop(array_meta_file, None) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 8cc60ba295..69ab08254e 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -18,7 +18,7 @@ from numcodecs import Zlib from numpy.testing import assert_array_equal -from zarr._storage.store import _get_hierarchy_metadata +from zarr._storage.store import _get_metadata_suffix from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import open_array @@ -260,7 +260,7 @@ def test_rmdir_group_and_array_metadata_files(self): if g1._version > 2 and g1.store.is_erasable(): arr_path = g1.path + '/arr1' - sfx = _get_hierarchy_metadata(g1.store)['metadata_key_suffix'] + sfx = _get_metadata_suffix(g1.store) array_meta_file = meta_root + arr_path + '.array' + sfx assert array_meta_file in g1.store group_meta_file = meta_root + g2.path + '.group' + sfx From f0362155546ed915fde5dcb60f1eb7f39c0b323a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 10 Mar 2022 21:54:54 -0500 Subject: [PATCH 105/109] ignore type checks for _get_metadata_suffix --- zarr/_storage/store.py | 6 +++--- zarr/convenience.py | 2 +- zarr/core.py | 3 ++- zarr/storage.py | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index dde1deb287..3907f4d051 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -410,7 +410,7 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore key = meta_root + prefix.rstrip("/") + ".array" + sfx else: raise ValueError("prefix must be supplied to get a v3 array key") @@ -422,7 +422,7 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: if prefix: - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore key = meta_root + prefix.rstrip('/') + ".group" + sfx else: raise ValueError("prefix must be supplied to get a v3 group key") @@ -434,7 +434,7 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: # for v3, attributes are stored in the array metadata - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore if prefix: key = meta_root + prefix.rstrip('/') + ".array" + sfx else: diff --git a/zarr/convenience.py b/zarr/convenience.py index 5d417363ab..2cbc9bdf68 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1209,7 +1209,7 @@ def is_zarr_key(key): else: - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore def is_zarr_key(key): return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or diff --git a/zarr/core.py b/zarr/core.py index 132f2ad7d4..39920b2cbd 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -6,7 +6,7 @@ import re from collections.abc import MutableMapping from functools import reduce -from typing import Any +from typing import Any, Mapping import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray @@ -194,6 +194,7 @@ def __init__( if self._version == 3: self._data_key_prefix = 'data/root/' + self._key_prefix self._data_path = 'data/root/' + self._path + self._hierarchy_metadata: Mapping[str, Any] if 'zarr.json' not in self._store: self._hierarchy_metadata = _default_entry_point_metadata_v3 else: diff --git a/zarr/storage.py b/zarr/storage.py index ebe99d1ca3..07189e55e5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -119,7 +119,7 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b return True # for v3, need to also handle implicit groups - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore implicit_prefix = key.replace('.group' + sfx, '') if not implicit_prefix.endswith('/'): implicit_prefix += '/' @@ -487,7 +487,7 @@ def _init_array_metadata( if '/' in path: # path is a subfolder of an existing array, remove that array parent_path = '/'.join(path.split('/')[:-1]) - sfx = _get_metadata_suffix(store) + sfx = _get_metadata_suffix(store) # type: ignore array_key = meta_root + parent_path + '.array' + sfx if array_key in store: store.erase(array_key) # type: ignore From 353058db117cfa3272079b05a794cbd53ae89d6b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 10 Mar 2022 22:07:23 -0500 Subject: [PATCH 106/109] remove unneeded if/else in Array and Hierarchy class __init__ default metadata already gets added by Metadata3.encode_hierarchy_metadata when meta=None --- zarr/core.py | 7 +------ zarr/hierarchy.py | 6 +----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 39920b2cbd..04c7c86edd 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -32,7 +32,6 @@ is_scalar, pop_fields, ) -from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import ( _get_hierarchy_metadata, _prefix_to_array_key, @@ -194,11 +193,7 @@ def __init__( if self._version == 3: self._data_key_prefix = 'data/root/' + self._key_prefix self._data_path = 'data/root/' + self._path - self._hierarchy_metadata: Mapping[str, Any] - if 'zarr.json' not in self._store: - self._hierarchy_metadata = _default_entry_point_metadata_v3 - else: - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) + self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] # initialize metadata diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 47c3933646..f0a9c74973 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -14,7 +14,6 @@ GroupNotFoundError, ReadOnlyError, ) -from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import ( _get_hierarchy_metadata, _prefix_to_group_key, @@ -134,10 +133,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, if self._version == 3: self._data_key_prefix = data_root + self._key_prefix self._data_path = data_root + self._path - if 'zarr.json' not in self._store: - self._hierarchy_metadata = _default_entry_point_metadata_v3 - else: - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) + self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) self._metadata_key_suffix = _get_metadata_suffix(store=self._store) # guard conditions From 2ec2b2d3b5ecf89e797c12c5b1ae94cdaaceee48 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 10 Mar 2022 22:08:18 -0500 Subject: [PATCH 107/109] remove unused import --- zarr/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/core.py b/zarr/core.py index 04c7c86edd..5e2b4252aa 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -6,7 +6,7 @@ import re from collections.abc import MutableMapping from functools import reduce -from typing import Any, Mapping +from typing import Any import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray From 2b82967df0139ca2fe74a73d714e9dd03c513991 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 10 Mar 2022 22:15:33 -0500 Subject: [PATCH 108/109] define DEFAULT_ZARR_VERSION so we can later more easily change from 2 to 3 --- zarr/_storage/store.py | 2 ++ zarr/creation.py | 9 +++++---- zarr/hierarchy.py | 12 ++++++------ zarr/storage.py | 3 ++- zarr/tests/test_creation.py | 15 ++++++++------- 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 3907f4d051..d1ad930609 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -15,6 +15,8 @@ meta_root = 'meta/root/' data_root = 'data/root/' +DEFAULT_ZARR_VERSION = 2 + class BaseStore(MutableMapping): """Abstract base class for store implementations. diff --git a/zarr/creation.py b/zarr/creation.py index 3950e79478..b8c40a859b 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -3,6 +3,7 @@ import numpy as np from numcodecs.registry import codec_registry +from zarr._storage.store import DEFAULT_ZARR_VERSION from zarr.core import Array from zarr.errors import ( ArrayNotFoundError, @@ -132,11 +133,11 @@ def create(shape, chunks=True, dtype=None, compressor='default', """ if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', 2) + zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) # handle polymorphic store arg store = normalize_store_arg(store, zarr_version=zarr_version) - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) @@ -515,12 +516,12 @@ def open_array( # a : read/write if exists, create otherwise (default) if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', 2) + zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) # handle polymorphic store arg store = normalize_store_arg(store, storage_options=storage_options, mode=mode, zarr_version=zarr_version) - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, storage_options=storage_options, diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index f0a9c74973..0684be4a57 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -3,7 +3,7 @@ import numpy as np -from zarr._storage.store import _get_metadata_suffix, data_root, meta_root +from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, DEFAULT_ZARR_VERSION from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import (array, create, empty, empty_like, full, full_like, @@ -116,7 +116,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None, zarr_version=None): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if chunk_store is not None: chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) self._store = store @@ -1177,7 +1177,7 @@ def move(self, source, dest): def _normalize_store_arg(store, *, storage_options=None, mode="r", zarr_version=None): if zarr_version is None: - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if store is None: return MemoryStore() if zarr_version == 2 else MemoryStoreV3() return normalize_store_arg(store, @@ -1233,7 +1233,7 @@ def group(store=None, overwrite=False, chunk_store=None, # handle polymorphic store arg store = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if zarr_version == 3 and path is None: raise ValueError(f"path must be provided for a v{zarr_version} group") path = normalize_storage_path(path) @@ -1304,12 +1304,12 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N store, storage_options=storage_options, mode=mode, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if chunk_store is not None: chunk_store = _normalize_store_arg(chunk_store, storage_options=storage_options, mode=mode) - if not getattr(chunk_store, '_store_version', 2) == zarr_version: + if not getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) == zarr_version: raise ValueError( "zarr_version of store and chunk_store must match" ) diff --git a/zarr/storage.py b/zarr/storage.py index 07189e55e5..709bbba7ee 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -43,6 +43,7 @@ ) from numcodecs.registry import codec_registry +from zarr._storage.store import DEFAULT_ZARR_VERSION from zarr.errors import ( MetadataError, BadCompressorError, @@ -132,7 +133,7 @@ def normalize_store_arg(store: Any, storage_options=None, mode="r", *, zarr_version=None) -> BaseStore: if zarr_version is None: # default to v2 store for backward compatibility - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if zarr_version not in [2, 3]: raise ValueError("zarr_version must be 2 or 3") if store is None: diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index d31aab4e76..cfab4f79ec 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -8,6 +8,7 @@ import pytest from numpy.testing import assert_array_equal +from zarr._storage.store import DEFAULT_ZARR_VERSION from zarr.codecs import Zlib from zarr.core import Array from zarr.creation import (array, create, empty, empty_like, full, full_like, @@ -57,7 +58,7 @@ def _init_creation_kwargs(zarr_version): @pytest.mark.parametrize('zarr_version', [None, 2, 3]) def test_array(zarr_version): - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version kwargs = _init_creation_kwargs(zarr_version) # with numpy array @@ -418,7 +419,7 @@ def test_create_in_dict(zarr_version): @pytest.mark.parametrize('zarr_version', [None, 2, 3]) def test_empty_like(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array z = empty(100, chunks=10, dtype='f4', compressor=Zlib(5), @@ -468,7 +469,7 @@ def test_empty_like(zarr_version): def test_zeros_like(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array z = zeros(100, chunks=10, dtype='f4', compressor=Zlib(5), @@ -495,7 +496,7 @@ def test_zeros_like(zarr_version): def test_ones_like(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array z = ones(100, chunks=10, dtype='f4', compressor=Zlib(5), @@ -523,7 +524,7 @@ def test_ones_like(zarr_version): def test_full_like(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version z = full(100, chunks=10, dtype='f4', compressor=Zlib(5), fill_value=42, order='F', **kwargs) @@ -552,7 +553,7 @@ def test_full_like(zarr_version): @pytest.mark.parametrize('zarr_version', [None, 2, 3]) def test_open_like(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # zarr array path = tempfile.mktemp() @@ -583,7 +584,7 @@ def test_open_like(zarr_version): @pytest.mark.parametrize('zarr_version', [None, 2, 3]) def test_create(zarr_version): kwargs = _init_creation_kwargs(zarr_version) - expected_zarr_version = 2 if zarr_version is None else zarr_version + expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version # defaults z = create(100, **kwargs) From 7aa5dfffaa378c1b90b3eceaf43dc5da9b6c6bce Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 10 Mar 2022 22:47:32 -0500 Subject: [PATCH 109/109] add test_get_hierarchy_metadata to test the v3 _get_hierarchy_metadata helper --- zarr/tests/test_storage_v3.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/zarr/tests/test_storage_v3.py b/zarr/tests/test_storage_v3.py index 482be11a1a..73fda1b758 100644 --- a/zarr/tests/test_storage_v3.py +++ b/zarr/tests/test_storage_v3.py @@ -1,10 +1,13 @@ import array import atexit +import copy import os import tempfile import numpy as np import pytest +from zarr._storage.store import _get_hierarchy_metadata +from zarr.meta import _default_entry_point_metadata_v3 from zarr.storage import (ABSStoreV3, ConsolidatedMetadataStoreV3, DBMStoreV3, DirectoryStoreV3, FSStoreV3, KVStore, KVStoreV3, LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, @@ -485,3 +488,26 @@ def metadata_key(self): def test_bad_store_version(self): with pytest.raises(ValueError): self.ConsolidatedMetadataClass(KVStore(dict())) + + +def test_get_hierarchy_metadata(): + store = KVStoreV3({}) + + # error raised if 'jarr.json' is not in the store + with pytest.raises(ValueError): + _get_hierarchy_metadata(store) + + store['zarr.json'] = _default_entry_point_metadata_v3 + assert _get_hierarchy_metadata(store) == _default_entry_point_metadata_v3 + + # ValueError if only a subset of keys are present + store['zarr.json'] = {'zarr_format': 'https://purl.org/zarr/spec/protocol/core/3.0'} + with pytest.raises(ValueError): + _get_hierarchy_metadata(store) + + # ValueError if any unexpected keys are present + extra_metadata = copy.copy(_default_entry_point_metadata_v3) + extra_metadata['extra_key'] = 'value' + store['zarr.json'] = extra_metadata + with pytest.raises(ValueError): + _get_hierarchy_metadata(store)