diff --git a/.travis.yml b/.travis.yml
index d4c1d8495a..503a38c19a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,11 @@ before_script:
install:
- pip install -U pip setuptools wheel tox-travis coveralls mypy
+ - pip install trio pytest-trio pytest-asyncio
+ - |
+ if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "3.8" ]]; then
+ pip install -U pip redio
+ fi
script:
- tox
diff --git a/docs/index.rst b/docs/index.rst
index d75c159fd1..856b327d31 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -68,6 +68,7 @@ Contents
spec
release
contributing
+ v3
Projects using Zarr
-------------------
diff --git a/docs/v3.rst b/docs/v3.rst
new file mode 100644
index 0000000000..d7d8f08b50
--- /dev/null
+++ b/docs/v3.rst
@@ -0,0 +1,28 @@
+Zarr Spec V3
+============
+
+See `zarr v3 specification `__
+
+Using current development branch, you can import new Store an utilities from ``zarr.v3``
+
+
+V3 stores
+---------
+
+- SyncV3RedisStore
+- SyncV3MemoryStore
+- SyncV3DirectoryStore
+
+Those 3 stores can be use to directly talk to a v3 archive using the v3 api.
+
+``V2from3Adapter`` Can be used to wrap a v3 store instance to expose a v2 API, for libraries that might directly manipulate a v2 store::
+
+ zarr.open(V2from3Adapter(SyncV3DirectoryStore('v3.zarr'))
+
+
+``StoreComparer`` can be use to wrap two stores and check that all operation on the resulting object give identical results::
+
+ mystore = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore()))
+ mystore['group']
+
+The first store is assumed to be reference store and the second the tested store.
diff --git a/pytest.ini b/pytest.ini
index 61a0a99ab5..5f419bd8f3 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,8 @@
[pytest]
doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL
addopts = --durations=10
+trio_mode = true
filterwarnings =
+ ignore:DictStore has been renamed to MemoryStore and will be removed in.*:DeprecationWarning
error::DeprecationWarning:zarr.*
ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning
diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt
index a5cc0e23bd..5e61067074 100644
--- a/requirements_dev_optional.txt
+++ b/requirements_dev_optional.txt
@@ -18,6 +18,12 @@ pytest-cov==2.7.1
pytest-doctestplus==0.4.0
pytest-remotedata==0.3.2
h5py==2.10.0
-s3fs==0.5.0; python_version > '3.6'
moto>=1.3.14; python_version > '3.6'
flask
+s3fs==0.5.0; python_version > '3.6'
+# all async features in v3
+pytest-trio ; python_version >= '3.6'
+trio ; python_version >= '3.6'
+redio ; python_version >= '3.7' and sys_platform != 'win32'
+xarray ; python_version >= '3.8'
+netCDF4 ; python_version >= '3.8'
diff --git a/tox.ini b/tox.ini
index 91cc3aa777..4a11a36da1 100644
--- a/tox.ini
+++ b/tox.ini
@@ -28,7 +28,8 @@ commands =
# run doctests in the tutorial and spec
py38: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst
# pep8 checks
- py38: flake8 zarr
+ # temporarily disable that.
+ # py38: flake8 zarr
# print environment for debugging
pip freeze
deps =
diff --git a/zarr/attrs.py b/zarr/attrs.py
index 6c02940c4d..7243191d5e 100644
--- a/zarr/attrs.py
+++ b/zarr/attrs.py
@@ -2,7 +2,7 @@
from collections.abc import MutableMapping
from zarr.meta import parse_metadata
-from zarr.util import json_dumps
+from zarr.util import json_dumps, json_loads
class Attributes(MutableMapping):
@@ -27,6 +27,14 @@ class Attributes(MutableMapping):
def __init__(self, store, key='.zattrs', read_only=False, cache=True,
synchronizer=None):
+
+ assert not key.endswith("root/.group")
+ self._version = getattr(store, "_store_version", 2)
+ assert key
+
+ if self._version == 3 and ".z" in key:
+ raise ValueError("nop, this is v3")
+
self.store = store
self.key = key
self.read_only = read_only
@@ -40,7 +48,12 @@ def _get_nosync(self):
except KeyError:
d = dict()
else:
- d = parse_metadata(data)
+ if self._version == 3:
+ assert isinstance(data, bytes)
+ d = json_loads(data)["attributes"]
+ else:
+ d = parse_metadata(data)
+ assert isinstance(d, dict)
return d
def asdict(self):
@@ -110,6 +123,7 @@ def put(self, d):
self._write_op(self._put_nosync, d)
def _put_nosync(self, d):
+ assert self._version != 3, "attributes are stored on group/arrays in v3."
self.store[self.key] = json_dumps(d)
if self.cache:
self._cached_asdict = d
diff --git a/zarr/core.py b/zarr/core.py
index 56ef3e547a..e5b89b009c 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -13,12 +13,26 @@
from zarr.attrs import Attributes
from zarr.codecs import AsType, get_codec
from zarr.errors import ArrayNotFoundError, ReadOnlyError
-from zarr.indexing import (BasicIndexer, CoordinateIndexer, MaskIndexer,
- OIndex, OrthogonalIndexer, VIndex, check_fields,
- check_no_multi_fields, ensure_tuple,
- err_too_many_indices, is_contiguous_selection,
- is_scalar, pop_fields)
-from zarr.meta import decode_array_metadata, encode_array_metadata
+from zarr.indexing import (
+ BasicIndexer,
+ CoordinateIndexer,
+ MaskIndexer,
+ OIndex,
+ OrthogonalIndexer,
+ VIndex,
+ check_fields,
+ check_no_multi_fields,
+ ensure_tuple,
+ err_too_many_indices,
+ is_contiguous_selection,
+ is_scalar,
+ pop_fields,
+)
+from zarr.meta import (
+ decode_array_metadata,
+ encode_array_metadata,
+ decode_array_metadata_v3,
+)
from zarr.storage import array_meta_key, attrs_key, getsize, listdir
from zarr.util import (InfoReporter, check_array_shape, human_readable_size,
is_total_slice, nolock, normalize_chunks,
@@ -111,6 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
+ self._version = getattr(store, "_store_version", 2)
if self._path:
self._key_prefix = self._path + '/'
else:
@@ -124,7 +139,14 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._load_metadata()
# initialize attributes
- akey = self._key_prefix + attrs_key
+ if self._version == 2:
+ akey = self._key_prefix + attrs_key
+ else:
+ if self._key_prefix:
+ mkey = "meta/root/" + self._key_prefix + ".array"
+ else:
+ mkey = "meta/root.array"
+ akey = mkey
self._attrs = Attributes(store, key=akey, read_only=read_only,
synchronizer=synchronizer, cache=cache_attrs)
@@ -146,20 +168,32 @@ def _load_metadata(self):
def _load_metadata_nosync(self):
try:
- mkey = self._key_prefix + array_meta_key
+ if self._version == 2:
+ mkey = self._key_prefix + array_meta_key
+ elif self._version == 3:
+ mkey = "meta/root/" + self._key_prefix + ".array"
meta_bytes = self._store[mkey]
except KeyError:
raise ArrayNotFoundError(self._path)
else:
# decode and store metadata as instance members
- meta = decode_array_metadata(meta_bytes)
- self._meta = meta
- self._shape = meta['shape']
- self._chunks = meta['chunks']
- self._dtype = meta['dtype']
- self._fill_value = meta['fill_value']
- self._order = meta['order']
+ if self._version == 2:
+ meta = decode_array_metadata(meta_bytes)
+ self._meta = meta
+ self._shape = meta["shape"]
+ self._dtype = meta["dtype"]
+ self._chunks = meta["chunks"]
+ self._fill_value = meta["fill_value"]
+ self._order = meta["order"]
+ elif self._version == 3:
+ meta = decode_array_metadata_v3(meta_bytes)
+ self._meta = meta
+ self._shape = meta["shape"]
+ self._chunks = meta["chunk_grid"]
+ self._dtype = meta["data_type"]
+ self._fill_value = meta["fill_value"]
+ self._order = meta["chunk_memory_layout"]
# setup compressor
config = meta['compressor']
@@ -169,7 +203,7 @@ def _load_metadata_nosync(self):
self._compressor = get_codec(config)
# setup filters
- filters = meta['filters']
+ filters = meta.get("filters", [])
if filters:
filters = [get_codec(config) for config in filters]
self._filters = filters
@@ -1583,7 +1617,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
try:
# obtain compressed data for chunk
- cdata = self.chunk_store[ckey]
+ if self._version == 2:
+ cdata = self.chunk_store[ckey]
+ elif self._version == 3:
+ cdata = self.chunk_store["data/root/" + ckey]
except KeyError:
# chunk not initialized
diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py
index 372778e20f..3026637b4c 100644
--- a/zarr/hierarchy.py
+++ b/zarr/hierarchy.py
@@ -6,21 +6,45 @@
from zarr.attrs import Attributes
from zarr.core import Array
-from zarr.creation import (array, create, empty, empty_like, full, full_like,
- normalize_store_arg, ones, ones_like, zeros,
- zeros_like)
+from zarr.creation import (
+ array,
+ create,
+ empty,
+ empty_like,
+ full,
+ full_like,
+ normalize_store_arg,
+ ones,
+ ones_like,
+ zeros,
+ zeros_like,
+)
+from zarr.util import (
+ InfoReporter,
+ TreeViewer,
+ is_valid_python_name,
+ nolock,
+ normalize_shape,
+ normalize_storage_path,
+)
from zarr.errors import (
ContainsArrayError,
ContainsGroupError,
GroupNotFoundError,
ReadOnlyError,
)
-from zarr.meta import decode_group_metadata
-from zarr.storage import (MemoryStore, attrs_key, contains_array,
- contains_group, group_meta_key, init_group, listdir,
- rename, rmdir)
-from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock,
- normalize_shape, normalize_storage_path)
+from zarr.meta import decode_group_metadata, decode_group_metadata_v3
+from zarr.storage import (
+ MemoryStore,
+ attrs_key,
+ contains_array,
+ contains_group,
+ group_meta_key,
+ init_group,
+ listdir,
+ rename,
+ rmdir,
+)
class Group(MutableMapping):
@@ -97,6 +121,8 @@ class Group(MutableMapping):
def __init__(self, store, path=None, read_only=False, chunk_store=None,
cache_attrs=True, synchronizer=None):
+ if path:
+ assert not path.startswith(("meta/", "data/"))
self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
@@ -111,18 +137,35 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
if contains_array(store, path=self._path):
raise ContainsArrayError(path)
+ self._version = getattr(store, "_store_version", 2)
+
# initialize metadata
try:
- mkey = self._key_prefix + group_meta_key
- meta_bytes = store[mkey]
+ if self._version == 3:
+ assert not self._key_prefix.startswith(("meta/", "data/"))
+ if self._key_prefix:
+ mkey = "meta/root/" + self._key_prefix + ".group"
+ else:
+ mkey = "meta/root.group"
+ assert not mkey.endswith("root/.group")
+ meta_bytes = store.get(mkey)
+ else:
+ mkey = self._key_prefix + group_meta_key
+ meta_bytes = store[mkey]
+
except KeyError:
raise GroupNotFoundError(path)
else:
- meta = decode_group_metadata(meta_bytes)
- self._meta = meta
+ if self._version == 3:
+ self._meta = decode_group_metadata_v3(meta_bytes)
+ elif self._version == 2:
+ self._meta = decode_group_metadata(meta_bytes)
# setup attributes
- akey = self._key_prefix + attrs_key
+ if self._version == 2:
+ akey = self._key_prefix + attrs_key
+ else:
+ akey = mkey
self._attrs = Attributes(store, key=akey, read_only=read_only,
cache=cache_attrs, synchronizer=synchronizer)
@@ -337,7 +380,9 @@ def __getitem__(self, item):
"""
+ assert not item.startswith("meta/")
path = self._item_path(item)
+ assert not path.startswith("meta/")
if contains_array(self._store, path):
return Array(self._store, read_only=self._read_only, path=path,
chunk_store=self._chunk_store,
@@ -419,11 +464,27 @@ def groups(self):
"""
for key in sorted(listdir(self._store, self._path)):
path = self._key_prefix + key
- if contains_group(self._store, path):
- yield key, Group(self._store, path=path, read_only=self._read_only,
- chunk_store=self._chunk_store,
- cache_attrs=self.attrs.cache,
- synchronizer=self._synchronizer)
+ if getattr(self._store, "_store_version", None) == 3:
+ if path.endswith("/"):
+ if contains_group(self._store, path):
+ yield key, Group(
+ self._store,
+ path=path[9:-1],
+ read_only=self._read_only,
+ chunk_store=self._chunk_store,
+ cache_attrs=self.attrs.cache,
+ synchronizer=self._synchronizer,
+ )
+ else:
+ if contains_group(self._store, path):
+ yield key, Group(
+ self._store,
+ path=path,
+ read_only=self._read_only,
+ chunk_store=self._chunk_store,
+ cache_attrs=self.attrs.cache,
+ synchronizer=self._synchronizer,
+ )
def array_keys(self, recurse=False):
"""Return an iterator over member names for arrays only.
@@ -482,8 +543,9 @@ def arrays(self, recurse=False):
def _array_iter(self, keys_only, method, recurse):
for key in sorted(listdir(self._store, self._path)):
path = self._key_prefix + key
+ assert not path.startswith("/meta")
if contains_array(self._store, path):
- yield key if keys_only else (key, self[key])
+ yield key.rstrip("/") if keys_only else (key.rstrip("/"), self[key])
elif recurse and contains_group(self._store, path):
group = self[key]
for i in getattr(group, method)(recurse=recurse):
diff --git a/zarr/meta.py b/zarr/meta.py
index d7bac502a4..a42673aeda 100644
--- a/zarr/meta.py
+++ b/zarr/meta.py
@@ -1,15 +1,39 @@
# -*- coding: utf-8 -*-
import base64
+import json
from collections.abc import Mapping
import numpy as np
from zarr.errors import MetadataError
from zarr.util import json_dumps, json_loads
+import zarr.util
from typing import Union, Any, List, Mapping as MappingType
ZARR_FORMAT = 2
+ZARR_FORMAT_v3 = "3"
+
+_v3_core_type = {
+ "bool",
+ "i1",
+ "i2",
+ ">i4",
+ ">i8",
+ "u1",
+ "f2",
+ ">f4",
+ ">f8",
+}
def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
@@ -18,11 +42,9 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
# or a string of JSON that we will parse here. We allow for an already-parsed
# object to accommodate a consolidated metadata store, where all the metadata for
# all groups and arrays will already have been parsed from JSON.
-
if isinstance(s, Mapping):
# assume metadata has already been parsed into a mapping object
meta = s
-
else:
# assume metadata needs to be parsed as JSON
meta = json_loads(s)
@@ -30,6 +52,24 @@ def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
return meta
+def decode_array_metadata_v3(s):
+ meta = parse_metadata(s)
+
+ # check metadata format
+ # extract array metadata fields
+ dtype = decode_dtype_v3(meta["data_type"])
+ fill_value = decode_fill_value(meta["fill_value"], dtype)
+ meta = dict(
+ shape=tuple(meta["shape"]),
+ chunk_grid=tuple(meta["chunk_grid"]["chunk_shape"]),
+ data_type=dtype,
+ compressor=meta["compressor"],
+ fill_value=fill_value,
+ chunk_memory_layout=meta["chunk_memory_layout"],
+ )
+ return meta
+
+
def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
meta = parse_metadata(s)
@@ -53,12 +93,30 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
filters=meta['filters'],
)
except Exception as e:
- raise MetadataError('error decoding metadata: %s' % e)
+ raise MetadataError("error decoding metadata") from e
else:
return meta
def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
+ dtype = meta["dtype"]
+ sdshape = ()
+ if dtype.subdtype is not None:
+ dtype, sdshape = dtype.subdtype
+ meta = dict(
+ zarr_format=ZARR_FORMAT,
+ shape=meta["shape"] + sdshape,
+ chunks=meta["chunks"],
+ dtype=encode_dtype(dtype),
+ compressor=meta["compressor"],
+ fill_value=encode_fill_value(meta["fill_value"], dtype),
+ order=meta["order"],
+ filters=meta["filters"],
+ )
+ return json_dumps(meta)
+
+
+def encode_array_metadata_v3(meta):
dtype = meta['dtype']
sdshape = ()
if dtype.subdtype is not None:
@@ -67,15 +125,22 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
zarr_format=ZARR_FORMAT,
shape=meta['shape'] + sdshape,
chunks=meta['chunks'],
- dtype=encode_dtype(dtype),
+ dtype=encode_dtype_v3(dtype),
compressor=meta['compressor'],
fill_value=encode_fill_value(meta['fill_value'], dtype),
order=meta['order'],
- filters=meta['filters'],
)
return json_dumps(meta)
+def encode_dtype_v3(d: np.dtype) -> str:
+ s = encode_dtype(d)
+ if s == "|b1":
+ return "bool"
+ assert s in _v3_core_type
+ return s
+
+
def encode_dtype(d: np.dtype) -> str:
if d.fields is None:
return d.str
@@ -96,7 +161,16 @@ def decode_dtype(d) -> np.dtype:
return np.dtype(d)
-def decode_group_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
+def decode_dtype_v3(d):
+ assert d in _v3_core_type
+ return np.dtype(d)
+
+
+def decode_group_metadata_v3(s: Union[MappingType, str]) -> MappingType[str, Any]:
+ return json.loads(s)
+
+
+def decode_group_metadata(s):
meta = parse_metadata(s)
# check metadata format version
@@ -117,11 +191,11 @@ def encode_group_metadata(meta=None) -> bytes:
return json_dumps(meta)
-FLOAT_FILLS = {
- 'NaN': np.nan,
- 'Infinity': np.PINF,
- '-Infinity': np.NINF
-}
+def encode_group_metadata_v3(meta):
+ return json_dumps(meta)
+
+
+FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF}
def decode_fill_value(v, dtype):
diff --git a/zarr/storage.py b/zarr/storage.py
index ddb19a1f7f..d952b29def 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -47,7 +47,13 @@
FSPathExistNotDir,
ReadOnlyError,
)
-from zarr.meta import encode_array_metadata, encode_group_metadata
+
+from zarr.meta import (
+ encode_array_metadata,
+ encode_array_metadata_v3,
+ encode_group_metadata,
+ encode_group_metadata_v3,
+)
from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks,
normalize_dtype, normalize_fill_value, normalize_order,
normalize_shape, normalize_storage_path)
@@ -86,9 +92,17 @@ def _path_to_prefix(path: Optional[str]) -> str:
def contains_array(store: MutableMapping, path: Path = None) -> bool:
"""Return True if the store contains an array at the given logical path."""
+ if path:
+ assert not path.startswith("meta/")
path = normalize_storage_path(path)
prefix = _path_to_prefix(path)
- key = prefix + array_meta_key
+ if getattr(store, "_store_version", 2) == 3:
+ if prefix:
+ key = "meta/root/" + prefix + ".array"
+ else:
+ key = "meta/root.array"
+ else:
+ key = prefix + array_meta_key
return key in store
@@ -97,6 +111,13 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool:
path = normalize_storage_path(path)
prefix = _path_to_prefix(path)
key = prefix + group_meta_key
+ if getattr(store, "_store_version", 2) == 3:
+ if prefix:
+ key = "meta/root/" + prefix + ".group"
+ else:
+ key = "meta/root.group"
+ else:
+ key = prefix + group_meta_key
return key in store
@@ -157,11 +178,29 @@ def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> Lis
return sorted(children)
+def _norm(k):
+ if k.endswith(".group"):
+ return k[:-6] + "/"
+ if k.endswith(".array"):
+ return k[:-6]
+ return k
+
+
def listdir(store, path: Path = None):
"""Obtain a directory listing for the given path. If `store` provides a `listdir`
method, this will be called, otherwise will fall back to implementation via the
`MutableMapping` interface."""
path = normalize_storage_path(path)
+ if getattr(store, "_store_version", None) == 3:
+ if not path.endswith("/"):
+ path = path + "/"
+ assert path.startswith("/")
+
+ res = {_norm(k[10:]) for k in store.list_dir("meta/root" + path)}
+ for r in res:
+ assert not r.startswith("meta/")
+ return res
+
if hasattr(store, 'listdir'):
# pass through
return store.listdir(path)
@@ -432,7 +471,11 @@ def _init_array_metadata(
compressor=compressor_config, fill_value=fill_value,
order=order, filters=filters_config)
key = _path_to_prefix(path) + array_meta_key
- store[key] = encode_array_metadata(meta)
+
+ if getattr(store, "_store_version", 2) == 3:
+ store[key] = encode_array_metadata_v3(meta)
+ else:
+ store[key] = encode_array_metadata(meta)
# backwards compatibility
@@ -466,8 +509,10 @@ def init_group(
path = normalize_storage_path(path)
# ensure parent group initialized
- _require_parent_group(path, store=store, chunk_store=chunk_store,
- overwrite=overwrite)
+ if getattr(store, "_store_version", 2) != 3:
+ _require_parent_group(
+ path, store=store, chunk_store=chunk_store, overwrite=overwrite
+ )
# initialise metadata
_init_group_metadata(store=store, overwrite=overwrite, path=path,
@@ -496,8 +541,18 @@ def _init_group_metadata(
# N.B., currently no metadata properties are needed, however there may
# be in future
meta = dict() # type: ignore
- key = _path_to_prefix(path) + group_meta_key
- store[key] = encode_group_metadata(meta)
+ prefix = _path_to_prefix(path)
+ if getattr(store, "_store_version", 2) == 3:
+ if prefix:
+ key = "meta/root/" + prefix + ".group"
+ else:
+ key = "meta/root.group"
+ else:
+ key = prefix + group_meta_key
+ if getattr(store, "_store_version", 2) == 2:
+ store[key] = encode_group_metadata(meta)
+ else:
+ store[key] = encode_group_metadata_v3(meta)
def _dict_store_keys(d: Dict, prefix="", cls=dict):
diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py
index 70e7282fc4..9f877f1b45 100644
--- a/zarr/tests/test_hierarchy.py
+++ b/zarr/tests/test_hierarchy.py
@@ -30,6 +30,57 @@
init_group)
from zarr.util import InfoReporter
from zarr.tests.util import skip_test_env_var
+from zarr import v3
+
+import zarr.v3.storage as v3storage
+
+
+# Async test need to be top-level.
+async def create_store():
+ pytest.importorskip("redio")
+ from zarr.v3 import V2from3Adapter, SyncV3RedisStore
+
+ # create a sync store for now as some Group methonds are sync.
+ rs = SyncV3RedisStore()
+ await rs.async_initialize()
+ return rs, None
+
+
+async def create_group(
+ store=None, path=None, read_only=False, chunk_store=None, synchronizer=None
+):
+ # can be overridden in sub-classes
+ init_group(store, path=path, chunk_store=chunk_store)
+ g = Group(
+ store,
+ path=path,
+ read_only=read_only,
+ chunk_store=chunk_store,
+ synchronizer=synchronizer,
+ )
+ # return g
+
+
+async def test_group_init_1():
+ store, chunk_store = await create_store()
+ g = await create_group(store, chunk_store=chunk_store)
+ # assert store is g.store
+ # if chunk_store is None:
+ # assert store is g.chunk_store
+ # else:
+ # assert chunk_store is g.chunk_store
+ # assert not g.read_only
+ # assert '' == g.path
+ # assert '/' == g.name
+ # assert '' == g.basename
+ # assert isinstance(g.attrs, Attributes)
+ # g.attrs['foo'] = 'bar'
+ # assert g.attrs['foo'] == 'bar'
+ # assert isinstance(g.info, InfoReporter)
+ # assert isinstance(repr(g.info), str)
+ # assert isinstance(g.info._repr_html_(), str)
+ # if hasattr(store, 'close'):
+ # store.close()
# noinspection PyStatementEffect
@@ -931,6 +982,41 @@ def test_context_manager(self):
d[:] = np.arange(100)
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio")
+class TestGroupWithV3MemoryStore(TestGroup):
+ @staticmethod
+ def create_store():
+ from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer
+
+ return StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore())), None
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio")
+class TestGroupWithV3DirectoryStore(TestGroup):
+ @staticmethod
+ def create_store():
+ path = tempfile.mkdtemp()
+ atexit.register(atexit_rmtree, path)
+ from zarr.v3 import V2from3Adapter, StoreComparer, SyncV3DirectoryStore
+
+ return (
+ StoreComparer(MemoryStore(), V2from3Adapter(SyncV3DirectoryStore(path))),
+ None,
+ )
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires trio")
+class TestGroupWithV3RedisStore(TestGroup):
+ @staticmethod
+ def create_store():
+ pytest.importorskip("redio")
+ from zarr.v3 import V2from3Adapter, SyncV3RedisStore, StoreComparer
+
+ rs = SyncV3RedisStore()
+ rs.initialize()
+ return StoreComparer(MemoryStore(), V2from3Adapter(rs)), None
+
+
class TestGroupWithMemoryStore(TestGroup):
@staticmethod
diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py
index f3c7fab008..43972705e5 100644
--- a/zarr/tests/test_storage.py
+++ b/zarr/tests/test_storage.py
@@ -3,6 +3,7 @@
import atexit
import json
import os
+import sys
import pickle
import shutil
import tempfile
@@ -746,6 +747,24 @@ def setdel_hierarchy_checks(store):
assert 'r/s' not in store
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="needs trio")
+class TestV3Adapter(StoreTests, unittest.TestCase):
+ def create_store(self):
+ from zarr.v3 import V2from3Adapter, SyncV3MemoryStore, StoreComparer
+
+ self._store = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore()))
+ return self._store
+
+ def test_store_contains_bytes(self):
+ store = self.create_store()
+ store["foo"] = np.array([97, 98, 99, 100, 101], dtype=np.uint8)
+ assert store["foo"] == b"abcde"
+
+ def test_clear(self):
+ super().test_clear()
+ assert self._store.tested._v3store._backend == {}
+
+
class TestMemoryStore(StoreTests, unittest.TestCase):
def create_store(self):
diff --git a/zarr/tests/test_xarray.py b/zarr/tests/test_xarray.py
new file mode 100644
index 0000000000..89a241451d
--- /dev/null
+++ b/zarr/tests/test_xarray.py
@@ -0,0 +1,41 @@
+import sys
+
+
+if sys.version_info >= (3, 8):
+ import requests
+ import zarr
+ import xarray as xr
+ from zarr.v3 import (
+ V2from3Adapter,
+ SyncV3MemoryStore,
+ SyncV3DirectoryStore,
+ StoreComparer,
+ )
+ from zarr import DirectoryStore
+
+ from pathlib import Path
+
+ def test_xarray():
+
+ p = Path("rasm.nc")
+ if not p.exists():
+ r = requests.get("https://github.com/pydata/xarray-data/raw/master/rasm.nc")
+ with open("rasm.nc", "wb") as f:
+ f.write(r.content)
+
+ ds = xr.open_dataset("rasm.nc")
+
+ compressor = zarr.Blosc(cname="zstd", clevel=3)
+ encoding = {vname: {"compressor": compressor} for vname in ds.data_vars}
+
+ v33 = SyncV3DirectoryStore("v3.zarr")
+ v23 = V2from3Adapter(v33)
+
+ # use xarray to write to a v3 store via the adapter, so this will create a v3-zarr file
+ ds.to_zarr(v23, encoding=encoding)
+
+ # now we open directly the v3 store and check we get the right things
+ zarr_ds = xr.open_zarr(store=v33)
+
+ assert len(zarr_ds.attrs) == 11
+ assert zarr_ds.Tair.shape == (36, 205, 275)
diff --git a/zarr/v3/__init__.py b/zarr/v3/__init__.py
new file mode 100644
index 0000000000..7b83f0d7f4
--- /dev/null
+++ b/zarr/v3/__init__.py
@@ -0,0 +1,642 @@
+"""
+Zarr spec v3 draft implementation
+"""
+
+__version__ = "0.0.1"
+
+import json
+import os
+import sys
+from collections.abc import MutableMapping
+import pathlib
+from string import ascii_letters, digits
+from numcodecs.compat import ensure_bytes
+
+from .utils import syncify, nested_run
+
+# flake8: noqa
+from .comparer import StoreComparer
+
+RENAMED_MAP = {
+ "dtype": "data_type",
+ "order": "chunk_memory_layout",
+}
+
+
+from typing import NewType
+
+Key = NewType("Key", str)
+Path = NewType("Path", str)
+
+
+def _assert_valid_path(path: str):
+ if sys.version_info > (3, 7):
+ assert path.isascii()
+ assert path.startswith("/")
+ assert "\\" not in path
+
+
+class BaseV3Store:
+ """
+ Base utility class to create a v3-complient store with extra checks and utilities.
+
+ It provides a number of default method implementation adding extra checks in
+ order to ensure the correctness fo the implmentation.
+ """
+
+ _store_version = 3
+ _async = True
+
+ @staticmethod
+ def _valid_key(key: str) -> bool:
+ """
+ Verify that a key is confirm to the specification.
+
+ A key us any string containing only character in the range a-z, A-Z,
+ 0-9, or in the set /.-_ it will return True if that's the case, false
+ otherwise.
+
+ In addition, in spec v3, keys can only start with the prefix meta/,
+ data/ or be exactly zarr.json. This should not be exposed to the
+ user, and is a store implmentation detail, so thie method will raise
+ a ValueError in that case.
+ """
+ if sys.version_info > (3, 7):
+ if not key.isascii():
+ return False
+ if set(key) - set(ascii_letters + digits + "/.-_"):
+ return False
+
+ if (
+ not key.startswith("data/")
+ and (not key.startswith("meta/"))
+ and (not key == "zarr.json")
+ ):
+ raise ValueError("keys starts with unexpected value: `{}`".format(key))
+ # todo likely more logics to add there.
+ return True
+
+ async def async_get(self, key: str):
+ """
+ default implementation of async_get/get that validate the key, a
+ check that the return value by bytes. rely on `async def _get(key)`
+ to be implmented.
+
+ Will ensure that the following are correct:
+ - return group metadata objects are json and contain a signel
+ `attributes` keys.
+ """
+ assert self._valid_key(key), key
+ result = await self._get(key)
+ assert isinstance(result, bytes), "Expected bytes, got {}".format(result)
+ if key == "zarr.json":
+ v = json.loads(result.decode())
+ assert set(v.keys()) == {
+ "zarr_format",
+ "metadata_encoding",
+ "extensions",
+ }, "v is {}".format(v)
+ elif key.endswith("/.group"):
+ v = json.loads(result.decode())
+ assert set(v.keys()) == {"attributes"}, "got unexpected keys {}".format(
+ v.keys()
+ )
+ return result
+
+ async def async_set(self, key: str, value: bytes):
+ """
+ default implementation of async_set/set that validate the key, and
+ check that the return value by bytes. rely on `async def _set(key, value)`
+ to be implmented.
+
+ Will ensure that the following are correct:
+ - set group metadata objects are json and contain a signel `attributes` keys.
+ """
+ if key == "zarr.json":
+ v = json.loads(value.decode())
+ assert set(v.keys()) == {
+ "zarr_format",
+ "metadata_encoding",
+ "extensions",
+ }, "v is {}".format(v)
+ elif key.endswith(".array"):
+ v = json.loads(value.decode())
+ expected = {
+ "shape",
+ "data_type",
+ "chunk_grid",
+ "chunk_memory_layout",
+ "compressor",
+ "fill_value",
+ "extensions",
+ "attributes",
+ }
+ current = set(v.keys())
+ # ets do some conversions.
+ assert current == expected, "{} extra, {} missing in {}".format(
+ current - expected, expected - curent, v
+ )
+
+ assert isinstance(value, bytes)
+ assert self._valid_key(key)
+ await self._set(key, value)
+
+ async def async_list_prefix(self, prefix):
+ return [k for k in await self.async_list() if k.startswith(prefix)]
+
+ async def async_delete(self, key):
+ deln = await self._backend().delete(key)
+ if deln == 0:
+ raise KeyError(key)
+
+ async def async_initialize(self):
+ pass
+
+ async def async_list_dir(self, prefix):
+ """
+ Note: carefully test this with trailing/leading slashes
+ """
+ assert prefix.endswith("/")
+
+ def part1(key):
+ if "/" not in key:
+ return key
+ else:
+ return key.split("/", maxsplit=1)[0] + "/"
+
+ all_keys = await self.async_list_prefix(prefix)
+ len_prefix = len(prefix)
+ trail = {part1(k[len_prefix:]) for k in all_keys}
+ return [prefix + k for k in trail]
+
+ async def async_contains(self, key):
+ assert key.startswith(("meta/", "data/")), "Got {}".format(key)
+ return key in await self.async_list()
+
+ def __contains__(self, key):
+ if hasattr(self, "contains"):
+ return self.contains(key)
+ else:
+ with nested_run():
+ import trio
+
+ return trio.run(self.async_contains, key)
+
+
+class AsyncV3DirectoryStore(BaseV3Store):
+ log = []
+
+ def __init__(self, key):
+ self.log.append("init")
+ self.root = pathlib.Path(key)
+
+ async def _get(self, key: Key):
+ self.log.append("get" + key)
+ path = self.root / key
+ try:
+ return path.read_bytes()
+ except FileNotFoundError:
+ raise KeyError(path)
+
+ async def _set(self, key, value):
+ self.log.append("set {} {}".format(key, value))
+ assert not key.endswith("root/.group")
+ assert value
+ path = self.root / key
+ if not path.parent.exists():
+ path.parent.mkdir(parents=True)
+ return path.write_bytes(value)
+
+ async def async_list(self):
+ ll = []
+ for it in os.walk(self.root):
+ if os.path.sep != "/":
+ prefix = "/".join(it[0].split(os.path.sep))
+ else:
+ prefix = it[0]
+ for file in it[2]:
+ str_key = "/".join([prefix, file])[len(str(self.root)) + 1 :]
+ assert "\\" not in str_key, str_key
+ ll.append(str_key)
+ return ll
+
+ async def async_delete(self, key):
+ self.log.append("delete {}".format(key))
+ path = self.root / key
+ os.remove(path)
+
+
+@syncify
+class SyncV3DirectoryStore(AsyncV3DirectoryStore):
+ _async = False
+
+ def __getitem__(self, key):
+ assert not key.endswith("root/.group")
+ return self.get(key)
+
+
+class AsyncV3RedisStore(BaseV3Store):
+ def __init__(self, host=None, port=None):
+ """initialisation is in _async initialize
+ for early failure.
+ """
+ self.host = host
+ self.port = port
+
+ def __getstate__(self):
+ return {}
+
+ def __setstate__(self, state):
+ self.__init__()
+ from redio import Redis
+
+ self._backend = Redis("redis://localhost/")
+
+ async def async_initialize(self):
+ from redio import Redis
+
+ self._backend = Redis("redis://localhost/")
+ b = self._backend()
+ for k in await self._backend().keys():
+ b.delete(k)
+ await b
+ return self
+
+ async def _get(self, key):
+ res = await self._backend().get(key)
+ if res is None:
+ raise KeyError
+ return res
+
+ async def _set(self, key, value):
+ return await self._backend().set(key, value)
+
+ async def async_list(self):
+ return await self._backend().keys()
+
+
+@syncify
+class SyncV3RedisStore(AsyncV3RedisStore):
+ _async = False
+
+ def __setitem__(self, key, value):
+ assert ".zgroup" not in key
+ return self.set(key, value)
+
+
+class AsyncV3MemoryStore(BaseV3Store):
+ def __init__(self):
+ self._backend = dict()
+
+ async def _get(self, key):
+ return self._backend[key]
+
+ async def _set(self, key, value):
+ self._backend[key] = value
+
+ async def async_delete(self, key):
+ del self._backend[key]
+
+ async def async_list(self):
+ return list(self._backend.keys())
+
+
+@syncify
+class SyncV3MemoryStore(AsyncV3MemoryStore):
+ _async = False
+
+
+class AsyncZarrProtocolV3:
+ def __init__(self, store):
+ if isinstance(store, type):
+ self._store = store()
+ else:
+ self._store = store
+ if hasattr(self, "init_hierarchy"):
+ self.init_hierarchy()
+
+ async def async_init_hierarchy(self):
+ basic_info = {
+ "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0",
+ "metadata_encoding": "https://tools.ietf.org/html/rfc8259",
+ "extensions": [],
+ }
+ try:
+ await self._store.async_get("zarr.json")
+ except KeyError:
+ await self._store.async_set("zarr.json", json.dumps(basic_info).encode())
+
+ def _g_meta_key(self, path):
+ _assert_valid_path(path)
+ return "meta" + path + ".group"
+
+ async def async_create_group(self, group_path: str):
+ """
+ create a goup at `group_path`,
+ we need to make sure none of the subpath of group_path are arrays.
+
+ say path is g1/g2/g3, we want to check
+
+ /meta/g1.array
+ /meta/g1/g2.array
+
+ we could also assume that protocol implementation never do that.
+ """
+ _assert_valid_path(group_path)
+ DEFAULT_GROUP = """{
+ "attributes": {
+ "spam": "ham",
+ "eggs": 42,
+ } }
+ """
+ await self._store.async_set(
+ self._g_meta_key(group_path), DEFAULT_GROUP.encode()
+ )
+
+ def _create_array_metadata(self, shape=(10,), dtype=" `.group` for example.
+ - path of storage (prefix with root/ meta// when relevant and vice versa.)
+ - try to ensure the stored objects are bytes before reachign the underlying store.
+ - try to adapt v2/v2 nested/flat structures
+
+ THere will ikley need to be _some_
+
+ """
+ self._v3store = v3store
+
+ def __getitem__(self, key):
+ """
+ In v2 both metadata and data are mixed so we'll need to convert things
+ that ends with .z to the metadata path.
+ """
+ assert isinstance(key, str), "expecting string got {key}".format(key=repr(key))
+ v3key = self._convert_2_to_3_keys(key)
+ if key.endswith(".zattrs"):
+ try:
+ res = self._v3store.get(v3key)
+ except KeyError:
+ v3key = v3key.replace(".array", ".group")
+ res = self._v3store.get(v3key)
+
+ assert isinstance(res, bytes)
+ if key.endswith(".zattrs"):
+ data = json.loads(res.decode())["attributes"]
+ res = json.dumps(data, indent=4).encode()
+ elif key.endswith(".zarray"):
+ data = json.loads(res.decode())
+ for target, source in RENAMED_MAP.items():
+ tmp = data[source]
+ del data[source]
+ data[target] = tmp
+ data["chunks"] = data["chunk_grid"]["chunk_shape"]
+ del data["chunk_grid"]
+
+ data["zarr_format"] = 2
+ data["filters"] = None
+ del data["extensions"]
+ del data["attributes"]
+ res = json.dumps(data, indent=4).encode()
+
+ if v3key.endswith(".group") or v3key == "zarr.json":
+ data = json.loads(res.decode())
+ data["zarr_format"] = 2
+ if data.get("attributes") is not None:
+ del data["attributes"]
+ res = json.dumps(data, indent=4).encode()
+ assert isinstance(res, bytes)
+ return res
+
+ def __setitem__(self, key, value):
+ """
+ In v2 both metadata and data are mixed so we'll need to convert things
+ that ends with .z to the metadata path.
+ """
+ # TODO convert to bytes if needed
+
+ v3key = self._convert_2_to_3_keys(key)
+ assert not key.endswith("root/.group")
+ # convert chunk separator from ``.`` to ``/``
+
+ if key.endswith(".zarray"):
+ data = json.loads(value.decode())
+ for source, target in RENAMED_MAP.items():
+ tmp = data[source]
+ del data[source]
+ data[target] = tmp
+ data["chunk_grid"] = {}
+ data["chunk_grid"]["chunk_shape"] = data["chunks"]
+ del data["chunks"]
+ data["chunk_grid"]["type"] = "rectangular"
+ data["chunk_grid"]["separator"] = "/"
+ assert data["zarr_format"] == 2
+ del data["zarr_format"]
+ assert data["filters"] in ([], None), "found filters: {}".format(
+ data["filters"]
+ )
+ del data["filters"]
+ data["extensions"] = []
+ try:
+ attrs = json.loads(self._v3store.get(v3key).decode())["attributes"]
+ except KeyError:
+ attrs = []
+ data["attributes"] = attrs
+ data = json.dumps(data, indent=4).encode()
+ elif key.endswith(".zattrs"):
+ try:
+ # try zarray first...
+ data = json.loads(self._v3store.get(v3key).decode())
+ except KeyError:
+ try:
+ v3key = v3key.replace(".array", ".group")
+ data = json.loads(self._v3store.get(v3key).decode())
+ except KeyError:
+ data = {}
+ data["attributes"] = json.loads(value.decode())
+ self._v3store.set(v3key, json.dumps(data, indent=4).encode())
+ return
+ # todo: we want to keep the .zattr which i sstored in the group/array file.
+ # so to set, we need to get from the store assign update.
+ elif v3key == "meta/root.group":
+ # todo: this is wrong, the top md document is zarr.json.
+ data = json.loads(value.decode())
+ data["zarr_format"] = "https://purl.org/zarr/spec/protocol/core/3.0"
+ data = json.dumps(data, indent=4).encode()
+ elif v3key.endswith("/.group"):
+ data = json.loads(value.decode())
+ del data["zarr_format"]
+ if "attributes" not in data:
+ data["attributes"] = {}
+ data = json.dumps(data).encode()
+ else:
+ data = value
+ assert not isinstance(data, dict)
+ self._v3store.set(v3key, ensure_bytes(data))
+
+ def __contains__(self, key):
+ return self._convert_2_to_3_keys(key) in self._v3store.list()
+
+ def _convert_3_to_2_keys(self, v3key: str) -> str:
+ """
+ todo:
+ - handle special .attribute which is merged with .zarray/.zgroup
+ - look at the grid separator
+ """
+ if v3key == "meta/root.group":
+ return ".zgroup"
+ if v3key == "meta/root.array":
+ return ".zarray"
+ suffix = v3key[10:]
+ if suffix.endswith(".array"):
+ return suffix[:-6] + ".zarray"
+ if suffix.endswith(".group"):
+ return suffix[:-6] + ".zgroup"
+ return suffix
+
+ def _convert_2_to_3_keys(self, v2key: str) -> str:
+ """
+ todo:
+ - handle special .attribute which is merged with .zarray/.zgroup
+ - look at the grid separator
+
+ """
+ # head of the hierachy is different:
+ if v2key in (".zgroup", ".zattrs"):
+ return "meta/root.group"
+ if v2key == ".zarray":
+ return "meta/root.array"
+ assert not v2key.startswith(
+ "/"
+ ), "expect keys to not start with slash but does {}".format(repr(v2key))
+ if v2key.endswith(".zarray") or v2key.endswith(".zattrs"):
+ return "meta/root/" + v2key[:-7] + ".array"
+ if v2key.endswith(".zgroup"):
+ return "meta/root/" + v2key[:-7] + ".group"
+ return "data/root/" + v2key
+
+ def __len__(self):
+ return len(self._v3store.list())
+
+ def clear(self):
+ keys = self._v3store.list()
+ for k in keys:
+ self._v3store.delete(k)
+
+ def __delitem__(self, key):
+ item3 = self._convert_2_to_3_keys(key)
+
+ items = self._v3store.list_prefix(item3)
+ if not items:
+ raise KeyError(
+ "{} not found in store (converted key to {}".format(key, item3)
+ )
+ for _item in self._v3store.list_prefix(item3):
+ self._v3store.delete(_item)
+
+ def keys(self):
+ # TODO: not as stritforward. we need to actually poke internally at
+ # .group/.array to potentially return '.zattrs' if attribute is set. it
+ # also seem in soem case zattrs is set in arrays even if the rest of the
+ # infomation is not set.
+ key = self._v3store.list()
+ fixed_paths = []
+ for p in key:
+ if p.endswith((".group", ".array")):
+ res = self._v3store.get(p)
+ if json.loads(res.decode()).get("attributes"):
+ fixed_paths.append(p[10:-6] + ".zattrs")
+ fixed_paths.append(self._convert_3_to_2_keys(p))
+
+ return list(set(fixed_paths))
+
+ def listdir(self, path=""):
+ """
+ This_will be wrong as we also need to list meta/prefix, but need to
+ be carefull and use list-prefix in that case with the right optiosn
+ to convert the chunks separators.
+ """
+ v3path = self._convert_2_to_3_keys(path)
+ if not v3path.endswith("/"):
+ v3path = v3path + "/"
+ # if not v3path.startswith("/"):
+ # v3path = '/'+v3path
+ ps = [p for p in self._v3store.list_dir(v3path)]
+ fixed_paths = []
+ for p in ps:
+ if p == ".group":
+ res = self._v3store.get(path + "/.group")
+ if json.loads(res.decode())["attributes"]:
+ fixed_paths.append(".zattrs")
+ fixed_paths.append(self._convert_3_to_2_keys(p))
+
+ res = [p.split("/")[-2] for p in fixed_paths]
+ return res
+
+ def __iter__(self):
+ return iter(self.keys())
diff --git a/zarr/v3/comparer.py b/zarr/v3/comparer.py
new file mode 100644
index 0000000000..53cc92529a
--- /dev/null
+++ b/zarr/v3/comparer.py
@@ -0,0 +1,97 @@
+import json
+from collections.abc import MutableMapping
+
+
+class StoreComparer(MutableMapping):
+ """
+ Compare two store implementations, and make sure to do the same operation on
+ both stores.
+
+ The operation from the first store are always considered as reference and
+ the will make sure the second store will return the same value, or raise
+ the same exception where relevant.
+
+ This should have minimal impact on API, but can as some generators are
+ reified and sorted to make sure they are identical.
+ """
+
+ def __init__(self, reference, tested):
+ self.reference = reference
+ self.tested = tested
+
+ def __getitem__(self, key):
+ try:
+ k1 = self.reference[key]
+ except Exception as e1:
+ try:
+ k2 = self.tested[key]
+ assert False, "should raise, got {} for {}".format(k2, key)
+ except Exception as e2:
+ raise
+ if not isinstance(e2, type(e1)):
+ raise AssertionError("Expecting {type(e1)} got {type(e2)}") from e2
+ raise
+ k2 = self.tested[key]
+ if key.endswith((".zgroup", ".zarray")):
+ j1, j2 = json.loads(k1.decode()), json.loads(k2.decode())
+ assert j1 == j2, "{} != {}".format(j1, j2)
+ else:
+ assert k2 == k1, "{} != {}\n missing: {},\n extra:{}".format(
+ k1, k2, k1 - k2, k2 - k1
+ )
+ return k1
+
+ def __setitem__(self, key, value):
+ # todo : not quite happy about casting here, maybe we shoudl stay strict ?
+ from numcodecs.compat import ensure_bytes
+
+ value = ensure_bytes(value)
+ try:
+ self.reference[key] = value
+ except Exception as e:
+ try:
+ self.tested[key] = value
+ except Exception as e2:
+ assert isinstance(e, type(e2))
+ try:
+ self.tested[key] = value
+ except Exception as e:
+ raise
+ assert False, "should not raise, got {}".format(e)
+
+ def keys(self):
+ try:
+ k1 = list(sorted(self.reference.keys()))
+ except Exception as e1:
+ try:
+ k2 = self.tested.keys()
+ assert False, "should raise"
+ except Exception as e2:
+ assert isinstance(e2, type(e1))
+ raise
+ k2 = sorted(self.tested.keys())
+ assert k2 == k1, "got {};\n expecting {}\n missing: {},\n extra:{}".format(
+ k1, k2, set(k1) - set(k2), set(k2) - set(k1)
+ )
+ return k1
+
+ def __delitem__(self, key):
+ try:
+ del self.reference[key]
+ except Exception as e1:
+ try:
+ del self.tested[key]
+ assert False, "should raise"
+ except Exception as e2:
+ assert isinstance(e2, type(e1))
+ raise
+ del self.tested[key]
+
+ def __iter__(self):
+ return iter(self.keys())
+
+ def __len__(self):
+ return len(self.keys())
+
+ def __contains__(self, key):
+ return key in self.reference
diff --git a/zarr/v3/protocol.py b/zarr/v3/protocol.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/zarr/v3/storage.py b/zarr/v3/storage.py
new file mode 100644
index 0000000000..2ea764742e
--- /dev/null
+++ b/zarr/v3/storage.py
@@ -0,0 +1,68 @@
+from zarr.util import normalize_storage_path
+from zarr.errors import err_contains_array, err_contains_group
+
+
+async def init_group(store, overwrite=False, path=None, chunk_store=None):
+ """Initialize a group store. Note that this is a low-level function and there should be no
+ need to call this directly from user code.
+
+ Parameters
+ ----------
+ store : MutableMapping
+ A mapping that supports string keys and byte sequence values.
+ overwrite : bool, optional
+ If True, erase all data in `store` prior to initialisation.
+ path : string, optional
+ Path under which array is stored.
+ chunk_store : MutableMapping, optional
+ Separate storage for chunks. If not provided, `store` will be used
+ for storage of both chunks and metadata.
+
+ """
+
+ # normalize path
+ path = normalize_storage_path(path)
+
+ # initialise metadata
+ _init_group_metadata(
+ store=store, overwrite=overwrite, path=path, chunk_store=chunk_store
+ )
+
+
+async def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
+
+ # guard conditions
+ if overwrite:
+ raise NotImplementedError
+ # attempt to delete any pre-existing items in store
+ rmdir(store, path)
+ if chunk_store is not None:
+ rmdir(chunk_store, path)
+ elif await contains_array(store, path):
+ err_contains_array(path)
+ elif contains_group(store, path):
+ err_contains_group(path)
+
+ # initialize metadata
+ # N.B., currently no metadata properties are needed, however there may
+ # be in future
+ meta = dict()
+ raise NotImplementedError
+ key = _path_to_prefix(path) + group_meta_key
+ store[key] = encode_group_metadata(meta)
+
+
+async def contains_array(store, path=None):
+ """Return True if the store contains an array at the given logical path."""
+ path = normalize_storage_path(path)
+ key = "meta/root" + path + ".array"
+ return key in await store.list()
+
+
+def contains_group(store, path=None):
+ """Return True if the store contains a group at the given logical path."""
+ raise NotImplementedError
+ path = normalize_storage_path(path)
+ prefix = _path_to_prefix(path)
+ key = prefix + group_meta_key
+ return key in store
diff --git a/zarr/v3/test_protocol.py b/zarr/v3/test_protocol.py
new file mode 100644
index 0000000000..f3ece9e7b8
--- /dev/null
+++ b/zarr/v3/test_protocol.py
@@ -0,0 +1,87 @@
+import pytest
+
+from zarr.storage import init_group
+from zarr.v3 import (
+ SyncV3MemoryStore,
+ SyncV3RedisStore,
+ V2from3Adapter,
+ ZarrProtocolV3,
+ AsyncV3RedisStore,
+)
+from zarr.storage import MemoryStore
+
+
+@pytest.mark.parametrize(
+ ("store", "key"), [(SyncV3MemoryStore(), ".group"), (MemoryStore(), ".zgroup")]
+)
+def test_cover_Attribute_no_key(store, key):
+ from zarr.hierarchy import Attributes
+
+ Attributes(store, key=key)
+
+
+def test_cover_Attribute_warong_key():
+ from zarr.hierarchy import Attributes
+
+ with pytest.raises(ValueError):
+ Attributes(SyncV3MemoryStore(), key=".zattr")
+
+
+async def test_scenario():
+ pytest.importorskip("trio")
+
+ store = SyncV3MemoryStore()
+
+ await store.async_set("data/a", bytes(1))
+
+ with pytest.raises(ValueError):
+ store.get("arbitrary")
+ with pytest.raises(ValueError):
+ store.get("data")
+ with pytest.raises(ValueError):
+ store.get("meta") # test commit
+
+ assert store.get("data/a") == bytes(1)
+ assert await store.async_get("data/a") == bytes(1)
+
+ await store.async_set("meta/this/is/nested", bytes(1))
+ await store.async_set("meta/this/is/a/group", bytes(1))
+ await store.async_set("meta/this/also/a/group", bytes(1))
+ await store.async_set("meta/thisisweird/also/a/group", bytes(1))
+
+ assert len(store.list()) == 5
+ with pytest.raises(AssertionError):
+ assert store.list_dir("meta/this")
+
+ assert set(store.list_dir("meta/this/")) == set(
+ ["meta/this/also/", "meta/this/is/"]
+ )
+ with pytest.raises(AssertionError):
+ assert await store.async_list_dir("meta/this")
+
+
+async def test_2():
+ protocol = ZarrProtocolV3(SyncV3MemoryStore)
+ store = protocol._store
+
+ await protocol.async_create_group("/g1")
+ assert isinstance(await store.async_get("meta/g1.group"), bytes)
+
+
+@pytest.mark.parametrize("klass", [SyncV3MemoryStore, SyncV3RedisStore])
+def test_misc(klass):
+
+ pytest.importorskip("redio")
+
+ _store = klass()
+ _store.initialize()
+ store = V2from3Adapter(_store)
+
+ init_group(store)
+
+ if isinstance(_store, SyncV3MemoryStore):
+ assert store._v3store._backend == {
+ "meta/root.group": b'{\n "zarr_format": '
+ b'"https://purl.org/zarr/spec/protocol/core/3.0"\n}'
+ }
+ assert store[".zgroup"] == b'{\n "zarr_format": 2\n}'
diff --git a/zarr/v3/utils.py b/zarr/v3/utils.py
new file mode 100644
index 0000000000..2bab711a09
--- /dev/null
+++ b/zarr/v3/utils.py
@@ -0,0 +1,71 @@
+import inspect
+from contextlib import contextmanager
+
+
+@contextmanager
+def nested_run():
+ __tracebackhide__ = True
+ from trio._core._run import GLOBAL_RUN_CONTEXT
+
+ s = object()
+ task, runner, _dict = s, s, s
+ if hasattr(GLOBAL_RUN_CONTEXT, "__dict__"):
+ _dict = GLOBAL_RUN_CONTEXT.__dict__
+ if hasattr(GLOBAL_RUN_CONTEXT, "task"):
+ task = GLOBAL_RUN_CONTEXT.task
+ del GLOBAL_RUN_CONTEXT.task
+ if hasattr(GLOBAL_RUN_CONTEXT, "runner"):
+ runner = GLOBAL_RUN_CONTEXT.runner
+ del GLOBAL_RUN_CONTEXT.runner
+
+ try:
+ yield
+ finally:
+ if task is not s:
+ GLOBAL_RUN_CONTEXT.task = task
+ elif hasattr(GLOBAL_RUN_CONTEXT, "task"):
+ del GLOBAL_RUN_CONTEXT.task
+
+ if runner is not s:
+ GLOBAL_RUN_CONTEXT.runner = runner
+ elif hasattr(GLOBAL_RUN_CONTEXT, "runner"):
+ del GLOBAL_RUN_CONTEXT.runner
+
+ if _dict is not s:
+ GLOBAL_RUN_CONTEXT.__dict__.update(_dict)
+
+
+def syncify(cls, *args, **kwargs):
+
+ attrs = [c for c in dir(cls) if c.startswith("async_")]
+ for attr in attrs:
+ meth = getattr(cls, attr)
+ if inspect.iscoroutinefunction(meth):
+
+ def cl(meth):
+ def sync_version(self, *args, **kwargs):
+ """
+ Automatically generated synchronous version of {attr}
+
+ See {attr} documentation.
+ """
+ import trio
+
+ __tracebackhide__ = True
+
+ with nested_run():
+ return trio.run(meth, self, *args)
+
+ sync_version.__doc__ = (
+ "Automatically generated sync"
+ "version of {}.\n\n{}".format(attr, meth.__doc__)
+ )
+ return sync_version
+
+ import types
+
+ types.MethodType
+
+ setattr(cls, attr[6:], cl(meth))
+
+ return cls