diff --git a/.travis.yml b/.travis.yml index d4c1d8495a..503a38c19a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,11 @@ before_script: install: - pip install -U pip setuptools wheel tox-travis coveralls mypy + - pip install trio pytest-trio pytest-asyncio + - | + if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "3.8" ]]; then + pip install -U pip redio + fi script: - tox diff --git a/docs/index.rst b/docs/index.rst index d75c159fd1..856b327d31 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -68,6 +68,7 @@ Contents spec release contributing + v3 Projects using Zarr ------------------- diff --git a/docs/v3.rst b/docs/v3.rst new file mode 100644 index 0000000000..d7d8f08b50 --- /dev/null +++ b/docs/v3.rst @@ -0,0 +1,28 @@ +Zarr Spec V3 +============ + +See `zarr v3 specification `__ + +Using current development branch, you can import new Store an utilities from ``zarr.v3`` + + +V3 stores +--------- + +- SyncV3RedisStore +- SyncV3MemoryStore +- SyncV3DirectoryStore + +Those 3 stores can be use to directly talk to a v3 archive using the v3 api. + +``V2from3Adapter`` Can be used to wrap a v3 store instance to expose a v2 API, for libraries that might directly manipulate a v2 store:: + + zarr.open(V2from3Adapter(SyncV3DirectoryStore('v3.zarr')) + + +``StoreComparer`` can be use to wrap two stores and check that all operation on the resulting object give identical results:: + + mystore = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())) + mystore['group'] + +The first store is assumed to be reference store and the second the tested store. diff --git a/pytest.ini b/pytest.ini index 61a0a99ab5..5f419bd8f3 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,8 @@ [pytest] doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL addopts = --durations=10 +trio_mode = true filterwarnings = + ignore:DictStore has been renamed to MemoryStore and will be removed in.*:DeprecationWarning error::DeprecationWarning:zarr.* ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index a5cc0e23bd..5e61067074 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -18,6 +18,12 @@ pytest-cov==2.7.1 pytest-doctestplus==0.4.0 pytest-remotedata==0.3.2 h5py==2.10.0 -s3fs==0.5.0; python_version > '3.6' moto>=1.3.14; python_version > '3.6' flask +s3fs==0.5.0; python_version > '3.6' +# all async features in v3 +pytest-trio ; python_version >= '3.6' +trio ; python_version >= '3.6' +redio ; python_version >= '3.7' and sys_platform != 'win32' +xarray ; python_version >= '3.8' +netCDF4 ; python_version >= '3.8' diff --git a/tox.ini b/tox.ini index 91cc3aa777..4a11a36da1 100644 --- a/tox.ini +++ b/tox.ini @@ -28,7 +28,8 @@ commands = # run doctests in the tutorial and spec py38: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst # pep8 checks - py38: flake8 zarr + # temporarily disable that. + # py38: flake8 zarr # print environment for debugging pip freeze deps = diff --git a/zarr/attrs.py b/zarr/attrs.py index 6c02940c4d..7243191d5e 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -2,7 +2,7 @@ from collections.abc import MutableMapping from zarr.meta import parse_metadata -from zarr.util import json_dumps +from zarr.util import json_dumps, json_loads class Attributes(MutableMapping): @@ -27,6 +27,14 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): + + assert not key.endswith("root/.group") + self._version = getattr(store, "_store_version", 2) + assert key + + if self._version == 3 and ".z" in key: + raise ValueError("nop, this is v3") + self.store = store self.key = key self.read_only = read_only @@ -40,7 +48,12 @@ def _get_nosync(self): except KeyError: d = dict() else: - d = parse_metadata(data) + if self._version == 3: + assert isinstance(data, bytes) + d = json_loads(data)["attributes"] + else: + d = parse_metadata(data) + assert isinstance(d, dict) return d def asdict(self): @@ -110,6 +123,7 @@ def put(self, d): self._write_op(self._put_nosync, d) def _put_nosync(self, d): + assert self._version != 3, "attributes are stored on group/arrays in v3." self.store[self.key] = json_dumps(d) if self.cache: self._cached_asdict = d diff --git a/zarr/core.py b/zarr/core.py index 56ef3e547a..e5b89b009c 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -13,12 +13,26 @@ from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError -from zarr.indexing import (BasicIndexer, CoordinateIndexer, MaskIndexer, - OIndex, OrthogonalIndexer, VIndex, check_fields, - check_no_multi_fields, ensure_tuple, - err_too_many_indices, is_contiguous_selection, - is_scalar, pop_fields) -from zarr.meta import decode_array_metadata, encode_array_metadata +from zarr.indexing import ( + BasicIndexer, + CoordinateIndexer, + MaskIndexer, + OIndex, + OrthogonalIndexer, + VIndex, + check_fields, + check_no_multi_fields, + ensure_tuple, + err_too_many_indices, + is_contiguous_selection, + is_scalar, + pop_fields, +) +from zarr.meta import ( + decode_array_metadata, + encode_array_metadata, + decode_array_metadata_v3, +) from zarr.storage import array_meta_key, attrs_key, getsize, listdir from zarr.util import (InfoReporter, check_array_shape, human_readable_size, is_total_slice, nolock, normalize_chunks, @@ -111,6 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) + self._version = getattr(store, "_store_version", 2) if self._path: self._key_prefix = self._path + '/' else: @@ -124,7 +139,14 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._load_metadata() # initialize attributes - akey = self._key_prefix + attrs_key + if self._version == 2: + akey = self._key_prefix + attrs_key + else: + if self._key_prefix: + mkey = "meta/root/" + self._key_prefix + ".array" + else: + mkey = "meta/root.array" + akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, synchronizer=synchronizer, cache=cache_attrs) @@ -146,20 +168,32 @@ def _load_metadata(self): def _load_metadata_nosync(self): try: - mkey = self._key_prefix + array_meta_key + if self._version == 2: + mkey = self._key_prefix + array_meta_key + elif self._version == 3: + mkey = "meta/root/" + self._key_prefix + ".array" meta_bytes = self._store[mkey] except KeyError: raise ArrayNotFoundError(self._path) else: # decode and store metadata as instance members - meta = decode_array_metadata(meta_bytes) - self._meta = meta - self._shape = meta['shape'] - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] - self._fill_value = meta['fill_value'] - self._order = meta['order'] + if self._version == 2: + meta = decode_array_metadata(meta_bytes) + self._meta = meta + self._shape = meta["shape"] + self._dtype = meta["dtype"] + self._chunks = meta["chunks"] + self._fill_value = meta["fill_value"] + self._order = meta["order"] + elif self._version == 3: + meta = decode_array_metadata_v3(meta_bytes) + self._meta = meta + self._shape = meta["shape"] + self._chunks = meta["chunk_grid"] + self._dtype = meta["data_type"] + self._fill_value = meta["fill_value"] + self._order = meta["chunk_memory_layout"] # setup compressor config = meta['compressor'] @@ -169,7 +203,7 @@ def _load_metadata_nosync(self): self._compressor = get_codec(config) # setup filters - filters = meta['filters'] + filters = meta.get("filters", []) if filters: filters = [get_codec(config) for config in filters] self._filters = filters @@ -1583,7 +1617,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, try: # obtain compressed data for chunk - cdata = self.chunk_store[ckey] + if self._version == 2: + cdata = self.chunk_store[ckey] + elif self._version == 3: + cdata = self.chunk_store["data/root/" + ckey] except KeyError: # chunk not initialized diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 372778e20f..3026637b4c 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -6,21 +6,45 @@ from zarr.attrs import Attributes from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - normalize_store_arg, ones, ones_like, zeros, - zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + normalize_store_arg, + ones, + ones_like, + zeros, + zeros_like, +) +from zarr.util import ( + InfoReporter, + TreeViewer, + is_valid_python_name, + nolock, + normalize_shape, + normalize_storage_path, +) from zarr.errors import ( ContainsArrayError, ContainsGroupError, GroupNotFoundError, ReadOnlyError, ) -from zarr.meta import decode_group_metadata -from zarr.storage import (MemoryStore, attrs_key, contains_array, - contains_group, group_meta_key, init_group, listdir, - rename, rmdir) -from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock, - normalize_shape, normalize_storage_path) +from zarr.meta import decode_group_metadata, decode_group_metadata_v3 +from zarr.storage import ( + MemoryStore, + attrs_key, + contains_array, + contains_group, + group_meta_key, + init_group, + listdir, + rename, + rmdir, +) class Group(MutableMapping): @@ -97,6 +121,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + if path: + assert not path.startswith(("meta/", "data/")) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -111,18 +137,35 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, if contains_array(store, path=self._path): raise ContainsArrayError(path) + self._version = getattr(store, "_store_version", 2) + # initialize metadata try: - mkey = self._key_prefix + group_meta_key - meta_bytes = store[mkey] + if self._version == 3: + assert not self._key_prefix.startswith(("meta/", "data/")) + if self._key_prefix: + mkey = "meta/root/" + self._key_prefix + ".group" + else: + mkey = "meta/root.group" + assert not mkey.endswith("root/.group") + meta_bytes = store.get(mkey) + else: + mkey = self._key_prefix + group_meta_key + meta_bytes = store[mkey] + except KeyError: raise GroupNotFoundError(path) else: - meta = decode_group_metadata(meta_bytes) - self._meta = meta + if self._version == 3: + self._meta = decode_group_metadata_v3(meta_bytes) + elif self._version == 2: + self._meta = decode_group_metadata(meta_bytes) # setup attributes - akey = self._key_prefix + attrs_key + if self._version == 2: + akey = self._key_prefix + attrs_key + else: + akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) @@ -337,7 +380,9 @@ def __getitem__(self, item): """ + assert not item.startswith("meta/") path = self._item_path(item) + assert not path.startswith("meta/") if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, @@ -419,11 +464,27 @@ def groups(self): """ for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key - if contains_group(self._store, path): - yield key, Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + if getattr(self._store, "_store_version", None) == 3: + if path.endswith("/"): + if contains_group(self._store, path): + yield key, Group( + self._store, + path=path[9:-1], + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + ) + else: + if contains_group(self._store, path): + yield key, Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + ) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. @@ -482,8 +543,9 @@ def arrays(self, recurse=False): def _array_iter(self, keys_only, method, recurse): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key + assert not path.startswith("/meta") if contains_array(self._store, path): - yield key if keys_only else (key, self[key]) + yield key.rstrip("/") if keys_only else (key.rstrip("/"), self[key]) elif recurse and contains_group(self._store, path): group = self[key] for i in getattr(group, method)(recurse=recurse): diff --git a/zarr/meta.py b/zarr/meta.py index d7bac502a4..a42673aeda 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,15 +1,39 @@ # -*- coding: utf-8 -*- import base64 +import json from collections.abc import Mapping import numpy as np from zarr.errors import MetadataError from zarr.util import json_dumps, json_loads +import zarr.util from typing import Union, Any, List, Mapping as MappingType ZARR_FORMAT = 2 +ZARR_FORMAT_v3 = "3" + +_v3_core_type = { + "bool", + "i1", + "i2", + ">i4", + ">i8", + "u1", + "f2", + ">f4", + ">f8", +} def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: @@ -18,11 +42,9 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for # all groups and arrays will already have been parsed from JSON. - if isinstance(s, Mapping): # assume metadata has already been parsed into a mapping object meta = s - else: # assume metadata needs to be parsed as JSON meta = json_loads(s) @@ -30,6 +52,24 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: return meta +def decode_array_metadata_v3(s): + meta = parse_metadata(s) + + # check metadata format + # extract array metadata fields + dtype = decode_dtype_v3(meta["data_type"]) + fill_value = decode_fill_value(meta["fill_value"], dtype) + meta = dict( + shape=tuple(meta["shape"]), + chunk_grid=tuple(meta["chunk_grid"]["chunk_shape"]), + data_type=dtype, + compressor=meta["compressor"], + fill_value=fill_value, + chunk_memory_layout=meta["chunk_memory_layout"], + ) + return meta + + def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: meta = parse_metadata(s) @@ -53,12 +93,30 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: filters=meta['filters'], ) except Exception as e: - raise MetadataError('error decoding metadata: %s' % e) + raise MetadataError("error decoding metadata") from e else: return meta def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: + dtype = meta["dtype"] + sdshape = () + if dtype.subdtype is not None: + dtype, sdshape = dtype.subdtype + meta = dict( + zarr_format=ZARR_FORMAT, + shape=meta["shape"] + sdshape, + chunks=meta["chunks"], + dtype=encode_dtype(dtype), + compressor=meta["compressor"], + fill_value=encode_fill_value(meta["fill_value"], dtype), + order=meta["order"], + filters=meta["filters"], + ) + return json_dumps(meta) + + +def encode_array_metadata_v3(meta): dtype = meta['dtype'] sdshape = () if dtype.subdtype is not None: @@ -67,15 +125,22 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: zarr_format=ZARR_FORMAT, shape=meta['shape'] + sdshape, chunks=meta['chunks'], - dtype=encode_dtype(dtype), + dtype=encode_dtype_v3(dtype), compressor=meta['compressor'], fill_value=encode_fill_value(meta['fill_value'], dtype), order=meta['order'], - filters=meta['filters'], ) return json_dumps(meta) +def encode_dtype_v3(d: np.dtype) -> str: + s = encode_dtype(d) + if s == "|b1": + return "bool" + assert s in _v3_core_type + return s + + def encode_dtype(d: np.dtype) -> str: if d.fields is None: return d.str @@ -96,7 +161,16 @@ def decode_dtype(d) -> np.dtype: return np.dtype(d) -def decode_group_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: +def decode_dtype_v3(d): + assert d in _v3_core_type + return np.dtype(d) + + +def decode_group_metadata_v3(s: Union[MappingType, str]) -> MappingType[str, Any]: + return json.loads(s) + + +def decode_group_metadata(s): meta = parse_metadata(s) # check metadata format version @@ -117,11 +191,11 @@ def encode_group_metadata(meta=None) -> bytes: return json_dumps(meta) -FLOAT_FILLS = { - 'NaN': np.nan, - 'Infinity': np.PINF, - '-Infinity': np.NINF -} +def encode_group_metadata_v3(meta): + return json_dumps(meta) + + +FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} def decode_fill_value(v, dtype): diff --git a/zarr/storage.py b/zarr/storage.py index ddb19a1f7f..d952b29def 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -47,7 +47,13 @@ FSPathExistNotDir, ReadOnlyError, ) -from zarr.meta import encode_array_metadata, encode_group_metadata + +from zarr.meta import ( + encode_array_metadata, + encode_array_metadata_v3, + encode_group_metadata, + encode_group_metadata_v3, +) from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dtype, normalize_fill_value, normalize_order, normalize_shape, normalize_storage_path) @@ -86,9 +92,17 @@ def _path_to_prefix(path: Optional[str]) -> str: def contains_array(store: MutableMapping, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" + if path: + assert not path.startswith("meta/") path = normalize_storage_path(path) prefix = _path_to_prefix(path) - key = prefix + array_meta_key + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".array" + else: + key = "meta/root.array" + else: + key = prefix + array_meta_key return key in store @@ -97,6 +111,13 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool: path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = prefix + group_meta_key + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".group" + else: + key = "meta/root.group" + else: + key = prefix + group_meta_key return key in store @@ -157,11 +178,29 @@ def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> Lis return sorted(children) +def _norm(k): + if k.endswith(".group"): + return k[:-6] + "/" + if k.endswith(".array"): + return k[:-6] + return k + + def listdir(store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) + if getattr(store, "_store_version", None) == 3: + if not path.endswith("/"): + path = path + "/" + assert path.startswith("/") + + res = {_norm(k[10:]) for k in store.list_dir("meta/root" + path)} + for r in res: + assert not r.startswith("meta/") + return res + if hasattr(store, 'listdir'): # pass through return store.listdir(path) @@ -432,7 +471,11 @@ def _init_array_metadata( compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key - store[key] = encode_array_metadata(meta) + + if getattr(store, "_store_version", 2) == 3: + store[key] = encode_array_metadata_v3(meta) + else: + store[key] = encode_array_metadata(meta) # backwards compatibility @@ -466,8 +509,10 @@ def init_group( path = normalize_storage_path(path) # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + if getattr(store, "_store_version", 2) != 3: + _require_parent_group( + path, store=store, chunk_store=chunk_store, overwrite=overwrite + ) # initialise metadata _init_group_metadata(store=store, overwrite=overwrite, path=path, @@ -496,8 +541,18 @@ def _init_group_metadata( # N.B., currently no metadata properties are needed, however there may # be in future meta = dict() # type: ignore - key = _path_to_prefix(path) + group_meta_key - store[key] = encode_group_metadata(meta) + prefix = _path_to_prefix(path) + if getattr(store, "_store_version", 2) == 3: + if prefix: + key = "meta/root/" + prefix + ".group" + else: + key = "meta/root.group" + else: + key = prefix + group_meta_key + if getattr(store, "_store_version", 2) == 2: + store[key] = encode_group_metadata(meta) + else: + store[key] = encode_group_metadata_v3(meta) def _dict_store_keys(d: Dict, prefix="", cls=dict): diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 70e7282fc4..9f877f1b45 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -30,6 +30,57 @@ init_group) from zarr.util import InfoReporter from zarr.tests.util import skip_test_env_var +from zarr import v3 + +import zarr.v3.storage as v3storage + + +# Async test need to be top-level. +async def create_store(): + pytest.importorskip("redio") + from zarr.v3 import V2from3Adapter, SyncV3RedisStore + + # create a sync store for now as some Group methonds are sync. + rs = SyncV3RedisStore() + await rs.async_initialize() + return rs, None + + +async def create_group( + store=None, path=None, read_only=False, chunk_store=None, synchronizer=None +): + # can be overridden in sub-classes + init_group(store, path=path, chunk_store=chunk_store) + g = Group( + store, + path=path, + read_only=read_only, + chunk_store=chunk_store, + synchronizer=synchronizer, + ) + # return g + + +async def test_group_init_1(): + store, chunk_store = await create_store() + g = await create_group(store, chunk_store=chunk_store) + # assert store is g.store + # if chunk_store is None: + # assert store is g.chunk_store + # else: + # assert chunk_store is g.chunk_store + # assert not g.read_only + # assert '' == g.path + # assert '/' == g.name + # assert '' == g.basename + # assert isinstance(g.attrs, Attributes) + # g.attrs['foo'] = 'bar' + # assert g.attrs['foo'] == 'bar' + # assert isinstance(g.info, InfoReporter) + # assert isinstance(repr(g.info), str) + # assert isinstance(g.info._repr_html_(), str) + # if hasattr(store, 'close'): + # store.close() # noinspection PyStatementEffect @@ -931,6 +982,41 @@ def test_context_manager(self): d[:] = np.arange(100) +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3MemoryStore(TestGroup): + @staticmethod + def create_store(): + from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer + + return StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())), None + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3DirectoryStore(TestGroup): + @staticmethod + def create_store(): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + from zarr.v3 import V2from3Adapter, StoreComparer, SyncV3DirectoryStore + + return ( + StoreComparer(MemoryStore(), V2from3Adapter(SyncV3DirectoryStore(path))), + None, + ) + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio") +class TestGroupWithV3RedisStore(TestGroup): + @staticmethod + def create_store(): + pytest.importorskip("redio") + from zarr.v3 import V2from3Adapter, SyncV3RedisStore, StoreComparer + + rs = SyncV3RedisStore() + rs.initialize() + return StoreComparer(MemoryStore(), V2from3Adapter(rs)), None + + class TestGroupWithMemoryStore(TestGroup): @staticmethod diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f3c7fab008..43972705e5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -3,6 +3,7 @@ import atexit import json import os +import sys import pickle import shutil import tempfile @@ -746,6 +747,24 @@ def setdel_hierarchy_checks(store): assert 'r/s' not in store +@pytest.mark.skipif(sys.version_info < (3, 6), reason="needs trio") +class TestV3Adapter(StoreTests, unittest.TestCase): + def create_store(self): + from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer + + self._store = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())) + return self._store + + def test_store_contains_bytes(self): + store = self.create_store() + store["foo"] = np.array([97, 98, 99, 100, 101], dtype=np.uint8) + assert store["foo"] == b"abcde" + + def test_clear(self): + super().test_clear() + assert self._store.tested._v3store._backend == {} + + class TestMemoryStore(StoreTests, unittest.TestCase): def create_store(self): diff --git a/zarr/tests/test_xarray.py b/zarr/tests/test_xarray.py new file mode 100644 index 0000000000..89a241451d --- /dev/null +++ b/zarr/tests/test_xarray.py @@ -0,0 +1,41 @@ +import sys + + +if sys.version_info >= (3, 8): + import requests + import zarr + import xarray as xr + from zarr.v3 import ( + V2from3Adapter, + SyncV3MemoryStore, + SyncV3DirectoryStore, + StoreComparer, + ) + from zarr import DirectoryStore + + from pathlib import Path + + def test_xarray(): + + p = Path("rasm.nc") + if not p.exists(): + r = requests.get("https://github.com/pydata/xarray-data/raw/master/rasm.nc") + with open("rasm.nc", "wb") as f: + f.write(r.content) + + ds = xr.open_dataset("rasm.nc") + + compressor = zarr.Blosc(cname="zstd", clevel=3) + encoding = {vname: {"compressor": compressor} for vname in ds.data_vars} + + v33 = SyncV3DirectoryStore("v3.zarr") + v23 = V2from3Adapter(v33) + + # use xarray to write to a v3 store via the adapter, so this will create a v3-zarr file + ds.to_zarr(v23, encoding=encoding) + + # now we open directly the v3 store and check we get the right things + zarr_ds = xr.open_zarr(store=v33) + + assert len(zarr_ds.attrs) == 11 + assert zarr_ds.Tair.shape == (36, 205, 275) diff --git a/zarr/v3/__init__.py b/zarr/v3/__init__.py new file mode 100644 index 0000000000..7b83f0d7f4 --- /dev/null +++ b/zarr/v3/__init__.py @@ -0,0 +1,642 @@ +""" +Zarr spec v3 draft implementation +""" + +__version__ = "0.0.1" + +import json +import os +import sys +from collections.abc import MutableMapping +import pathlib +from string import ascii_letters, digits +from numcodecs.compat import ensure_bytes + +from .utils import syncify, nested_run + +# flake8: noqa +from .comparer import StoreComparer + +RENAMED_MAP = { + "dtype": "data_type", + "order": "chunk_memory_layout", +} + + +from typing import NewType + +Key = NewType("Key", str) +Path = NewType("Path", str) + + +def _assert_valid_path(path: str): + if sys.version_info > (3, 7): + assert path.isascii() + assert path.startswith("/") + assert "\\" not in path + + +class BaseV3Store: + """ + Base utility class to create a v3-complient store with extra checks and utilities. + + It provides a number of default method implementation adding extra checks in + order to ensure the correctness fo the implmentation. + """ + + _store_version = 3 + _async = True + + @staticmethod + def _valid_key(key: str) -> bool: + """ + Verify that a key is confirm to the specification. + + A key us any string containing only character in the range a-z, A-Z, + 0-9, or in the set /.-_ it will return True if that's the case, false + otherwise. + + In addition, in spec v3, keys can only start with the prefix meta/, + data/ or be exactly zarr.json. This should not be exposed to the + user, and is a store implmentation detail, so thie method will raise + a ValueError in that case. + """ + if sys.version_info > (3, 7): + if not key.isascii(): + return False + if set(key) - set(ascii_letters + digits + "/.-_"): + return False + + if ( + not key.startswith("data/") + and (not key.startswith("meta/")) + and (not key == "zarr.json") + ): + raise ValueError("keys starts with unexpected value: `{}`".format(key)) + # todo likely more logics to add there. + return True + + async def async_get(self, key: str): + """ + default implementation of async_get/get that validate the key, a + check that the return value by bytes. rely on `async def _get(key)` + to be implmented. + + Will ensure that the following are correct: + - return group metadata objects are json and contain a signel + `attributes` keys. + """ + assert self._valid_key(key), key + result = await self._get(key) + assert isinstance(result, bytes), "Expected bytes, got {}".format(result) + if key == "zarr.json": + v = json.loads(result.decode()) + assert set(v.keys()) == { + "zarr_format", + "metadata_encoding", + "extensions", + }, "v is {}".format(v) + elif key.endswith("/.group"): + v = json.loads(result.decode()) + assert set(v.keys()) == {"attributes"}, "got unexpected keys {}".format( + v.keys() + ) + return result + + async def async_set(self, key: str, value: bytes): + """ + default implementation of async_set/set that validate the key, and + check that the return value by bytes. rely on `async def _set(key, value)` + to be implmented. + + Will ensure that the following are correct: + - set group metadata objects are json and contain a signel `attributes` keys. + """ + if key == "zarr.json": + v = json.loads(value.decode()) + assert set(v.keys()) == { + "zarr_format", + "metadata_encoding", + "extensions", + }, "v is {}".format(v) + elif key.endswith(".array"): + v = json.loads(value.decode()) + expected = { + "shape", + "data_type", + "chunk_grid", + "chunk_memory_layout", + "compressor", + "fill_value", + "extensions", + "attributes", + } + current = set(v.keys()) + # ets do some conversions. + assert current == expected, "{} extra, {} missing in {}".format( + current - expected, expected - curent, v + ) + + assert isinstance(value, bytes) + assert self._valid_key(key) + await self._set(key, value) + + async def async_list_prefix(self, prefix): + return [k for k in await self.async_list() if k.startswith(prefix)] + + async def async_delete(self, key): + deln = await self._backend().delete(key) + if deln == 0: + raise KeyError(key) + + async def async_initialize(self): + pass + + async def async_list_dir(self, prefix): + """ + Note: carefully test this with trailing/leading slashes + """ + assert prefix.endswith("/") + + def part1(key): + if "/" not in key: + return key + else: + return key.split("/", maxsplit=1)[0] + "/" + + all_keys = await self.async_list_prefix(prefix) + len_prefix = len(prefix) + trail = {part1(k[len_prefix:]) for k in all_keys} + return [prefix + k for k in trail] + + async def async_contains(self, key): + assert key.startswith(("meta/", "data/")), "Got {}".format(key) + return key in await self.async_list() + + def __contains__(self, key): + if hasattr(self, "contains"): + return self.contains(key) + else: + with nested_run(): + import trio + + return trio.run(self.async_contains, key) + + +class AsyncV3DirectoryStore(BaseV3Store): + log = [] + + def __init__(self, key): + self.log.append("init") + self.root = pathlib.Path(key) + + async def _get(self, key: Key): + self.log.append("get" + key) + path = self.root / key + try: + return path.read_bytes() + except FileNotFoundError: + raise KeyError(path) + + async def _set(self, key, value): + self.log.append("set {} {}".format(key, value)) + assert not key.endswith("root/.group") + assert value + path = self.root / key + if not path.parent.exists(): + path.parent.mkdir(parents=True) + return path.write_bytes(value) + + async def async_list(self): + ll = [] + for it in os.walk(self.root): + if os.path.sep != "/": + prefix = "/".join(it[0].split(os.path.sep)) + else: + prefix = it[0] + for file in it[2]: + str_key = "/".join([prefix, file])[len(str(self.root)) + 1 :] + assert "\\" not in str_key, str_key + ll.append(str_key) + return ll + + async def async_delete(self, key): + self.log.append("delete {}".format(key)) + path = self.root / key + os.remove(path) + + +@syncify +class SyncV3DirectoryStore(AsyncV3DirectoryStore): + _async = False + + def __getitem__(self, key): + assert not key.endswith("root/.group") + return self.get(key) + + +class AsyncV3RedisStore(BaseV3Store): + def __init__(self, host=None, port=None): + """initialisation is in _async initialize + for early failure. + """ + self.host = host + self.port = port + + def __getstate__(self): + return {} + + def __setstate__(self, state): + self.__init__() + from redio import Redis + + self._backend = Redis("redis://localhost/") + + async def async_initialize(self): + from redio import Redis + + self._backend = Redis("redis://localhost/") + b = self._backend() + for k in await self._backend().keys(): + b.delete(k) + await b + return self + + async def _get(self, key): + res = await self._backend().get(key) + if res is None: + raise KeyError + return res + + async def _set(self, key, value): + return await self._backend().set(key, value) + + async def async_list(self): + return await self._backend().keys() + + +@syncify +class SyncV3RedisStore(AsyncV3RedisStore): + _async = False + + def __setitem__(self, key, value): + assert ".zgroup" not in key + return self.set(key, value) + + +class AsyncV3MemoryStore(BaseV3Store): + def __init__(self): + self._backend = dict() + + async def _get(self, key): + return self._backend[key] + + async def _set(self, key, value): + self._backend[key] = value + + async def async_delete(self, key): + del self._backend[key] + + async def async_list(self): + return list(self._backend.keys()) + + +@syncify +class SyncV3MemoryStore(AsyncV3MemoryStore): + _async = False + + +class AsyncZarrProtocolV3: + def __init__(self, store): + if isinstance(store, type): + self._store = store() + else: + self._store = store + if hasattr(self, "init_hierarchy"): + self.init_hierarchy() + + async def async_init_hierarchy(self): + basic_info = { + "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0", + "metadata_encoding": "https://tools.ietf.org/html/rfc8259", + "extensions": [], + } + try: + await self._store.async_get("zarr.json") + except KeyError: + await self._store.async_set("zarr.json", json.dumps(basic_info).encode()) + + def _g_meta_key(self, path): + _assert_valid_path(path) + return "meta" + path + ".group" + + async def async_create_group(self, group_path: str): + """ + create a goup at `group_path`, + we need to make sure none of the subpath of group_path are arrays. + + say path is g1/g2/g3, we want to check + + /meta/g1.array + /meta/g1/g2.array + + we could also assume that protocol implementation never do that. + """ + _assert_valid_path(group_path) + DEFAULT_GROUP = """{ + "attributes": { + "spam": "ham", + "eggs": 42, + } } + """ + await self._store.async_set( + self._g_meta_key(group_path), DEFAULT_GROUP.encode() + ) + + def _create_array_metadata(self, shape=(10,), dtype=" `.group` for example. + - path of storage (prefix with root/ meta// when relevant and vice versa.) + - try to ensure the stored objects are bytes before reachign the underlying store. + - try to adapt v2/v2 nested/flat structures + + THere will ikley need to be _some_ + + """ + self._v3store = v3store + + def __getitem__(self, key): + """ + In v2 both metadata and data are mixed so we'll need to convert things + that ends with .z to the metadata path. + """ + assert isinstance(key, str), "expecting string got {key}".format(key=repr(key)) + v3key = self._convert_2_to_3_keys(key) + if key.endswith(".zattrs"): + try: + res = self._v3store.get(v3key) + except KeyError: + v3key = v3key.replace(".array", ".group") + res = self._v3store.get(v3key) + + assert isinstance(res, bytes) + if key.endswith(".zattrs"): + data = json.loads(res.decode())["attributes"] + res = json.dumps(data, indent=4).encode() + elif key.endswith(".zarray"): + data = json.loads(res.decode()) + for target, source in RENAMED_MAP.items(): + tmp = data[source] + del data[source] + data[target] = tmp + data["chunks"] = data["chunk_grid"]["chunk_shape"] + del data["chunk_grid"] + + data["zarr_format"] = 2 + data["filters"] = None + del data["extensions"] + del data["attributes"] + res = json.dumps(data, indent=4).encode() + + if v3key.endswith(".group") or v3key == "zarr.json": + data = json.loads(res.decode()) + data["zarr_format"] = 2 + if data.get("attributes") is not None: + del data["attributes"] + res = json.dumps(data, indent=4).encode() + assert isinstance(res, bytes) + return res + + def __setitem__(self, key, value): + """ + In v2 both metadata and data are mixed so we'll need to convert things + that ends with .z to the metadata path. + """ + # TODO convert to bytes if needed + + v3key = self._convert_2_to_3_keys(key) + assert not key.endswith("root/.group") + # convert chunk separator from ``.`` to ``/`` + + if key.endswith(".zarray"): + data = json.loads(value.decode()) + for source, target in RENAMED_MAP.items(): + tmp = data[source] + del data[source] + data[target] = tmp + data["chunk_grid"] = {} + data["chunk_grid"]["chunk_shape"] = data["chunks"] + del data["chunks"] + data["chunk_grid"]["type"] = "rectangular" + data["chunk_grid"]["separator"] = "/" + assert data["zarr_format"] == 2 + del data["zarr_format"] + assert data["filters"] in ([], None), "found filters: {}".format( + data["filters"] + ) + del data["filters"] + data["extensions"] = [] + try: + attrs = json.loads(self._v3store.get(v3key).decode())["attributes"] + except KeyError: + attrs = [] + data["attributes"] = attrs + data = json.dumps(data, indent=4).encode() + elif key.endswith(".zattrs"): + try: + # try zarray first... + data = json.loads(self._v3store.get(v3key).decode()) + except KeyError: + try: + v3key = v3key.replace(".array", ".group") + data = json.loads(self._v3store.get(v3key).decode()) + except KeyError: + data = {} + data["attributes"] = json.loads(value.decode()) + self._v3store.set(v3key, json.dumps(data, indent=4).encode()) + return + # todo: we want to keep the .zattr which i sstored in the group/array file. + # so to set, we need to get from the store assign update. + elif v3key == "meta/root.group": + # todo: this is wrong, the top md document is zarr.json. + data = json.loads(value.decode()) + data["zarr_format"] = "https://purl.org/zarr/spec/protocol/core/3.0" + data = json.dumps(data, indent=4).encode() + elif v3key.endswith("/.group"): + data = json.loads(value.decode()) + del data["zarr_format"] + if "attributes" not in data: + data["attributes"] = {} + data = json.dumps(data).encode() + else: + data = value + assert not isinstance(data, dict) + self._v3store.set(v3key, ensure_bytes(data)) + + def __contains__(self, key): + return self._convert_2_to_3_keys(key) in self._v3store.list() + + def _convert_3_to_2_keys(self, v3key: str) -> str: + """ + todo: + - handle special .attribute which is merged with .zarray/.zgroup + - look at the grid separator + """ + if v3key == "meta/root.group": + return ".zgroup" + if v3key == "meta/root.array": + return ".zarray" + suffix = v3key[10:] + if suffix.endswith(".array"): + return suffix[:-6] + ".zarray" + if suffix.endswith(".group"): + return suffix[:-6] + ".zgroup" + return suffix + + def _convert_2_to_3_keys(self, v2key: str) -> str: + """ + todo: + - handle special .attribute which is merged with .zarray/.zgroup + - look at the grid separator + + """ + # head of the hierachy is different: + if v2key in (".zgroup", ".zattrs"): + return "meta/root.group" + if v2key == ".zarray": + return "meta/root.array" + assert not v2key.startswith( + "/" + ), "expect keys to not start with slash but does {}".format(repr(v2key)) + if v2key.endswith(".zarray") or v2key.endswith(".zattrs"): + return "meta/root/" + v2key[:-7] + ".array" + if v2key.endswith(".zgroup"): + return "meta/root/" + v2key[:-7] + ".group" + return "data/root/" + v2key + + def __len__(self): + return len(self._v3store.list()) + + def clear(self): + keys = self._v3store.list() + for k in keys: + self._v3store.delete(k) + + def __delitem__(self, key): + item3 = self._convert_2_to_3_keys(key) + + items = self._v3store.list_prefix(item3) + if not items: + raise KeyError( + "{} not found in store (converted key to {}".format(key, item3) + ) + for _item in self._v3store.list_prefix(item3): + self._v3store.delete(_item) + + def keys(self): + # TODO: not as stritforward. we need to actually poke internally at + # .group/.array to potentially return '.zattrs' if attribute is set. it + # also seem in soem case zattrs is set in arrays even if the rest of the + # infomation is not set. + key = self._v3store.list() + fixed_paths = [] + for p in key: + if p.endswith((".group", ".array")): + res = self._v3store.get(p) + if json.loads(res.decode()).get("attributes"): + fixed_paths.append(p[10:-6] + ".zattrs") + fixed_paths.append(self._convert_3_to_2_keys(p)) + + return list(set(fixed_paths)) + + def listdir(self, path=""): + """ + This_will be wrong as we also need to list meta/prefix, but need to + be carefull and use list-prefix in that case with the right optiosn + to convert the chunks separators. + """ + v3path = self._convert_2_to_3_keys(path) + if not v3path.endswith("/"): + v3path = v3path + "/" + # if not v3path.startswith("/"): + # v3path = '/'+v3path + ps = [p for p in self._v3store.list_dir(v3path)] + fixed_paths = [] + for p in ps: + if p == ".group": + res = self._v3store.get(path + "/.group") + if json.loads(res.decode())["attributes"]: + fixed_paths.append(".zattrs") + fixed_paths.append(self._convert_3_to_2_keys(p)) + + res = [p.split("/")[-2] for p in fixed_paths] + return res + + def __iter__(self): + return iter(self.keys()) diff --git a/zarr/v3/comparer.py b/zarr/v3/comparer.py new file mode 100644 index 0000000000..53cc92529a --- /dev/null +++ b/zarr/v3/comparer.py @@ -0,0 +1,97 @@ +import json +from collections.abc import MutableMapping + + +class StoreComparer(MutableMapping): + """ + Compare two store implementations, and make sure to do the same operation on + both stores. + + The operation from the first store are always considered as reference and + the will make sure the second store will return the same value, or raise + the same exception where relevant. + + This should have minimal impact on API, but can as some generators are + reified and sorted to make sure they are identical. + """ + + def __init__(self, reference, tested): + self.reference = reference + self.tested = tested + + def __getitem__(self, key): + try: + k1 = self.reference[key] + except Exception as e1: + try: + k2 = self.tested[key] + assert False, "should raise, got {} for {}".format(k2, key) + except Exception as e2: + raise + if not isinstance(e2, type(e1)): + raise AssertionError("Expecting {type(e1)} got {type(e2)}") from e2 + raise + k2 = self.tested[key] + if key.endswith((".zgroup", ".zarray")): + j1, j2 = json.loads(k1.decode()), json.loads(k2.decode()) + assert j1 == j2, "{} != {}".format(j1, j2) + else: + assert k2 == k1, "{} != {}\n missing: {},\n extra:{}".format( + k1, k2, k1 - k2, k2 - k1 + ) + return k1 + + def __setitem__(self, key, value): + # todo : not quite happy about casting here, maybe we shoudl stay strict ? + from numcodecs.compat import ensure_bytes + + value = ensure_bytes(value) + try: + self.reference[key] = value + except Exception as e: + try: + self.tested[key] = value + except Exception as e2: + assert isinstance(e, type(e2)) + try: + self.tested[key] = value + except Exception as e: + raise + assert False, "should not raise, got {}".format(e) + + def keys(self): + try: + k1 = list(sorted(self.reference.keys())) + except Exception as e1: + try: + k2 = self.tested.keys() + assert False, "should raise" + except Exception as e2: + assert isinstance(e2, type(e1)) + raise + k2 = sorted(self.tested.keys()) + assert k2 == k1, "got {};\n expecting {}\n missing: {},\n extra:{}".format( + k1, k2, set(k1) - set(k2), set(k2) - set(k1) + ) + return k1 + + def __delitem__(self, key): + try: + del self.reference[key] + except Exception as e1: + try: + del self.tested[key] + assert False, "should raise" + except Exception as e2: + assert isinstance(e2, type(e1)) + raise + del self.tested[key] + + def __iter__(self): + return iter(self.keys()) + + def __len__(self): + return len(self.keys()) + + def __contains__(self, key): + return key in self.reference diff --git a/zarr/v3/protocol.py b/zarr/v3/protocol.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/zarr/v3/storage.py b/zarr/v3/storage.py new file mode 100644 index 0000000000..2ea764742e --- /dev/null +++ b/zarr/v3/storage.py @@ -0,0 +1,68 @@ +from zarr.util import normalize_storage_path +from zarr.errors import err_contains_array, err_contains_group + + +async def init_group(store, overwrite=False, path=None, chunk_store=None): + """Initialize a group store. Note that this is a low-level function and there should be no + need to call this directly from user code. + + Parameters + ---------- + store : MutableMapping + A mapping that supports string keys and byte sequence values. + overwrite : bool, optional + If True, erase all data in `store` prior to initialisation. + path : string, optional + Path under which array is stored. + chunk_store : MutableMapping, optional + Separate storage for chunks. If not provided, `store` will be used + for storage of both chunks and metadata. + + """ + + # normalize path + path = normalize_storage_path(path) + + # initialise metadata + _init_group_metadata( + store=store, overwrite=overwrite, path=path, chunk_store=chunk_store + ) + + +async def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None): + + # guard conditions + if overwrite: + raise NotImplementedError + # attempt to delete any pre-existing items in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) + elif await contains_array(store, path): + err_contains_array(path) + elif contains_group(store, path): + err_contains_group(path) + + # initialize metadata + # N.B., currently no metadata properties are needed, however there may + # be in future + meta = dict() + raise NotImplementedError + key = _path_to_prefix(path) + group_meta_key + store[key] = encode_group_metadata(meta) + + +async def contains_array(store, path=None): + """Return True if the store contains an array at the given logical path.""" + path = normalize_storage_path(path) + key = "meta/root" + path + ".array" + return key in await store.list() + + +def contains_group(store, path=None): + """Return True if the store contains a group at the given logical path.""" + raise NotImplementedError + path = normalize_storage_path(path) + prefix = _path_to_prefix(path) + key = prefix + group_meta_key + return key in store diff --git a/zarr/v3/test_protocol.py b/zarr/v3/test_protocol.py new file mode 100644 index 0000000000..f3ece9e7b8 --- /dev/null +++ b/zarr/v3/test_protocol.py @@ -0,0 +1,87 @@ +import pytest + +from zarr.storage import init_group +from zarr.v3 import ( + SyncV3MemoryStore, + SyncV3RedisStore, + V2from3Adapter, + ZarrProtocolV3, + AsyncV3RedisStore, +) +from zarr.storage import MemoryStore + + +@pytest.mark.parametrize( + ("store", "key"), [(SyncV3MemoryStore(), ".group"), (MemoryStore(), ".zgroup")] +) +def test_cover_Attribute_no_key(store, key): + from zarr.hierarchy import Attributes + + Attributes(store, key=key) + + +def test_cover_Attribute_warong_key(): + from zarr.hierarchy import Attributes + + with pytest.raises(ValueError): + Attributes(SyncV3MemoryStore(), key=".zattr") + + +async def test_scenario(): + pytest.importorskip("trio") + + store = SyncV3MemoryStore() + + await store.async_set("data/a", bytes(1)) + + with pytest.raises(ValueError): + store.get("arbitrary") + with pytest.raises(ValueError): + store.get("data") + with pytest.raises(ValueError): + store.get("meta") # test commit + + assert store.get("data/a") == bytes(1) + assert await store.async_get("data/a") == bytes(1) + + await store.async_set("meta/this/is/nested", bytes(1)) + await store.async_set("meta/this/is/a/group", bytes(1)) + await store.async_set("meta/this/also/a/group", bytes(1)) + await store.async_set("meta/thisisweird/also/a/group", bytes(1)) + + assert len(store.list()) == 5 + with pytest.raises(AssertionError): + assert store.list_dir("meta/this") + + assert set(store.list_dir("meta/this/")) == set( + ["meta/this/also/", "meta/this/is/"] + ) + with pytest.raises(AssertionError): + assert await store.async_list_dir("meta/this") + + +async def test_2(): + protocol = ZarrProtocolV3(SyncV3MemoryStore) + store = protocol._store + + await protocol.async_create_group("/g1") + assert isinstance(await store.async_get("meta/g1.group"), bytes) + + +@pytest.mark.parametrize("klass", [SyncV3MemoryStore, SyncV3RedisStore]) +def test_misc(klass): + + pytest.importorskip("redio") + + _store = klass() + _store.initialize() + store = V2from3Adapter(_store) + + init_group(store) + + if isinstance(_store, SyncV3MemoryStore): + assert store._v3store._backend == { + "meta/root.group": b'{\n "zarr_format": ' + b'"https://purl.org/zarr/spec/protocol/core/3.0"\n}' + } + assert store[".zgroup"] == b'{\n "zarr_format": 2\n}' diff --git a/zarr/v3/utils.py b/zarr/v3/utils.py new file mode 100644 index 0000000000..2bab711a09 --- /dev/null +++ b/zarr/v3/utils.py @@ -0,0 +1,71 @@ +import inspect +from contextlib import contextmanager + + +@contextmanager +def nested_run(): + __tracebackhide__ = True + from trio._core._run import GLOBAL_RUN_CONTEXT + + s = object() + task, runner, _dict = s, s, s + if hasattr(GLOBAL_RUN_CONTEXT, "__dict__"): + _dict = GLOBAL_RUN_CONTEXT.__dict__ + if hasattr(GLOBAL_RUN_CONTEXT, "task"): + task = GLOBAL_RUN_CONTEXT.task + del GLOBAL_RUN_CONTEXT.task + if hasattr(GLOBAL_RUN_CONTEXT, "runner"): + runner = GLOBAL_RUN_CONTEXT.runner + del GLOBAL_RUN_CONTEXT.runner + + try: + yield + finally: + if task is not s: + GLOBAL_RUN_CONTEXT.task = task + elif hasattr(GLOBAL_RUN_CONTEXT, "task"): + del GLOBAL_RUN_CONTEXT.task + + if runner is not s: + GLOBAL_RUN_CONTEXT.runner = runner + elif hasattr(GLOBAL_RUN_CONTEXT, "runner"): + del GLOBAL_RUN_CONTEXT.runner + + if _dict is not s: + GLOBAL_RUN_CONTEXT.__dict__.update(_dict) + + +def syncify(cls, *args, **kwargs): + + attrs = [c for c in dir(cls) if c.startswith("async_")] + for attr in attrs: + meth = getattr(cls, attr) + if inspect.iscoroutinefunction(meth): + + def cl(meth): + def sync_version(self, *args, **kwargs): + """ + Automatically generated synchronous version of {attr} + + See {attr} documentation. + """ + import trio + + __tracebackhide__ = True + + with nested_run(): + return trio.run(meth, self, *args) + + sync_version.__doc__ = ( + "Automatically generated sync" + "version of {}.\n\n{}".format(attr, meth.__doc__) + ) + return sync_version + + import types + + types.MethodType + + setattr(cls, attr[6:], cl(meth)) + + return cls