Skip to content

Commit a61828f

Browse files
committed
[DRAFT] V3 spec implmentation.
This is mostly opened to foster discussion and need code from https://github.com/Carreau/zarr-spec-v3-impl. IN the above mentioned repository I'm working on looking at what an implementation of the spec v3 could look to inform us on the possible transition and compatiblity shims. So far I have a rough implementation of an in memory v3 store as well as multiple utilities. 1) A base class that automatically provide sync version of all async method of a class. I'm playing with the idea of having most method async as this may be useful in some context. For example when creating a array at /a/b/c/d/e/f/g/h/i/j/k you want to check that None of the parents are arrays, which can be done with N async requests. 2) An adapted class that wraps a v3 store and provide a v2 API. My though is that most code is currently v2 compatible and it would be useful for legacy codebase and early testing of store. 3) a class that wrap 2 stores, a reference and a tested one, replicate operation on both stores, and abort if it sees any difference in behavior. This could help to catch changes in behavior. The tests in this PR start to test the v3 memorystore and compare it to the v2 memorystore.
1 parent 0e465a2 commit a61828f

20 files changed

+1455
-59
lines changed

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ before_script:
4040

4141
install:
4242
- pip install -U pip setuptools wheel tox-travis coveralls mypy
43+
- pip install trio pytest-trio pytest-asyncio
44+
- |
45+
if [[ "$TRAVIS_PYTHON_VERSION" == "3.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "3.8" ]]; then
46+
pip install -U pip redio
47+
fi
4348
4449
script:
4550
- tox

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Contents
6868
spec
6969
release
7070
contributing
71+
v3
7172

7273
Projects using Zarr
7374
-------------------

docs/v3.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Zarr Spec V3
2+
============
3+
4+
See `zarr v3 specification <https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/>`__
5+
6+
Using current development branch, you can import new Store an utilities from ``zarr.v3``
7+
8+
9+
V3 stores
10+
---------
11+
12+
- SyncV3RedisStore
13+
- SyncV3MemoryStore
14+
- SyncV3DirectoryStore
15+
16+
Those 3 stores can be use to directly talk to a v3 archive using the v3 api.
17+
18+
``V2from3Adapter`` Can be used to wrap a v3 store instance to expose a v2 API, for libraries that might directly manipulate a v2 store::
19+
20+
zarr.open(V2from3Adapter(SyncV3DirectoryStore('v3.zarr'))
21+
22+
23+
``StoreComparer`` can be use to wrap two stores and check that all operation on the resulting object give identical results::
24+
25+
mystore = StoreComparer(MemoryStore(), V2from3Adapter(SyncV3MemoryStore()))
26+
mystore['group']
27+
28+
The first store is assumed to be reference store and the second the tested store.

pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
[pytest]
22
doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL
33
addopts = --durations=10
4+
trio_mode = true
45
filterwarnings =
6+
ignore:DictStore has been renamed to MemoryStore and will be removed in.*:DeprecationWarning
57
error::DeprecationWarning:zarr.*
68
ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning

requirements_dev_optional.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ pytest-cov==2.7.1
1818
pytest-doctestplus==0.4.0
1919
pytest-remotedata==0.3.2
2020
h5py==2.10.0
21-
s3fs==0.5.0; python_version > '3.6'
2221
moto>=1.3.14; python_version > '3.6'
2322
flask
23+
s3fs==0.5.0; python_version > '3.6'
24+
# all async features in v3
25+
pytest-trio ; python_version >= '3.6'
26+
trio ; python_version >= '3.6'
27+
redio ; python_version >= '3.7' and sys_platform != 'win32'
28+
xarray ; python_version >= '3.8'
29+
netCDF4 ; python_version >= '3.8'

tox.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ commands =
2828
# run doctests in the tutorial and spec
2929
py38: python -m doctest -o NORMALIZE_WHITESPACE -o ELLIPSIS docs/tutorial.rst docs/spec/v2.rst
3030
# pep8 checks
31-
py38: flake8 zarr
31+
# temporarily disable that.
32+
# py38: flake8 zarr
3233
# print environment for debugging
3334
pip freeze
3435
deps =

zarr/attrs.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from collections.abc import MutableMapping
33

44
from zarr.meta import parse_metadata
5-
from zarr.util import json_dumps
5+
from zarr.util import json_dumps, json_loads
66

77

88
class Attributes(MutableMapping):
@@ -27,6 +27,14 @@ class Attributes(MutableMapping):
2727

2828
def __init__(self, store, key='.zattrs', read_only=False, cache=True,
2929
synchronizer=None):
30+
31+
assert not key.endswith("root/.group")
32+
self._version = getattr(store, "_store_version", 2)
33+
assert key
34+
35+
if self._version == 3 and ".z" in key:
36+
raise ValueError("nop, this is v3")
37+
3038
self.store = store
3139
self.key = key
3240
self.read_only = read_only
@@ -40,7 +48,12 @@ def _get_nosync(self):
4048
except KeyError:
4149
d = dict()
4250
else:
43-
d = parse_metadata(data)
51+
if self._version == 3:
52+
assert isinstance(data, bytes)
53+
d = json_loads(data)["attributes"]
54+
else:
55+
d = parse_metadata(data)
56+
assert isinstance(d, dict)
4457
return d
4558

4659
def asdict(self):
@@ -110,6 +123,7 @@ def put(self, d):
110123
self._write_op(self._put_nosync, d)
111124

112125
def _put_nosync(self, d):
126+
assert self._version != 3, "attributes are stored on group/arrays in v3."
113127
self.store[self.key] = json_dumps(d)
114128
if self.cache:
115129
self._cached_asdict = d

zarr/core.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,27 @@
1313
from zarr.attrs import Attributes
1414
from zarr.codecs import AsType, get_codec
1515
from zarr.errors import ArrayNotFoundError, ReadOnlyError
16-
from zarr.indexing import (BasicIndexer, CoordinateIndexer, MaskIndexer,
17-
OIndex, OrthogonalIndexer, VIndex, check_fields,
18-
check_no_multi_fields, ensure_tuple,
19-
err_too_many_indices, is_contiguous_selection,
20-
is_scalar, pop_fields)
21-
from zarr.meta import decode_array_metadata, encode_array_metadata
22-
from zarr.storage import array_meta_key, attrs_key, getsize, listdir
16+
from zarr.indexing import (
17+
BasicIndexer,
18+
CoordinateIndexer,
19+
MaskIndexer,
20+
OIndex,
21+
OrthogonalIndexer,
22+
VIndex,
23+
check_fields,
24+
check_no_multi_fields,
25+
ensure_tuple,
26+
err_too_many_indices,
27+
is_contiguous_selection,
28+
is_scalar,
29+
pop_fields,
30+
)
31+
from zarr.meta import (
32+
decode_array_metadata,
33+
encode_array_metadata,
34+
decode_array_metadata_v3,
35+
)
36+
from iarr.storage import array_meta_key, attrs_key, getsize, listdir
2337
from zarr.util import (InfoReporter, check_array_shape, human_readable_size,
2438
is_total_slice, nolock, normalize_chunks,
2539
normalize_resize_args, normalize_shape,
@@ -111,6 +125,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
111125
self._store = store
112126
self._chunk_store = chunk_store
113127
self._path = normalize_storage_path(path)
128+
self._version = getattr(store, "_store_version", 2)
114129
if self._path:
115130
self._key_prefix = self._path + '/'
116131
else:
@@ -124,7 +139,14 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
124139
self._load_metadata()
125140

126141
# initialize attributes
127-
akey = self._key_prefix + attrs_key
142+
if self._version == 2:
143+
akey = self._key_prefix + attrs_key
144+
else:
145+
if self._key_prefix:
146+
mkey = "meta/root/" + self._key_prefix + ".array"
147+
else:
148+
mkey = "meta/root.array"
149+
akey = mkey
128150
self._attrs = Attributes(store, key=akey, read_only=read_only,
129151
synchronizer=synchronizer, cache=cache_attrs)
130152

@@ -146,20 +168,32 @@ def _load_metadata(self):
146168

147169
def _load_metadata_nosync(self):
148170
try:
149-
mkey = self._key_prefix + array_meta_key
171+
if self._version == 2:
172+
mkey = self._key_prefix + array_meta_key
173+
elif self._version == 3:
174+
mkey = "meta/root/" + self._key_prefix + ".array"
150175
meta_bytes = self._store[mkey]
151176
except KeyError:
152177
raise ArrayNotFoundError(self._path)
153178
else:
154179

155180
# decode and store metadata as instance members
156-
meta = decode_array_metadata(meta_bytes)
157-
self._meta = meta
158-
self._shape = meta['shape']
159-
self._chunks = meta['chunks']
160-
self._dtype = meta['dtype']
161-
self._fill_value = meta['fill_value']
162-
self._order = meta['order']
181+
if self._version == 2:
182+
meta = decode_array_metadata(meta_bytes)
183+
self._meta = meta
184+
self._shape = meta["shape"]
185+
self._dtype = meta["dtype"]
186+
self._chunks = meta["chunks"]
187+
self._fill_value = meta["fill_value"]
188+
self._order = meta["order"]
189+
elif self._version == 3:
190+
meta = decode_array_metadata_v3(meta_bytes)
191+
self._meta = meta
192+
self._shape = meta["shape"]
193+
self._chunks = meta["chunk_grid"]
194+
self._dtype = meta["data_type"]
195+
self._fill_value = meta["fill_value"]
196+
self._order = meta["chunk_memory_layout"]
163197

164198
# setup compressor
165199
config = meta['compressor']
@@ -169,7 +203,7 @@ def _load_metadata_nosync(self):
169203
self._compressor = get_codec(config)
170204

171205
# setup filters
172-
filters = meta['filters']
206+
filters = meta.get("filters", [])
173207
if filters:
174208
filters = [get_codec(config) for config in filters]
175209
self._filters = filters
@@ -1583,7 +1617,10 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
15831617

15841618
try:
15851619
# obtain compressed data for chunk
1586-
cdata = self.chunk_store[ckey]
1620+
if self._version == 2:
1621+
cdata = self.chunk_store[ckey]
1622+
elif self._version == 3:
1623+
cdata = self.chunk_store["data/root/" + ckey]
15871624

15881625
except KeyError:
15891626
# chunk not initialized

0 commit comments

Comments
 (0)