Skip to content

Commit 0aaf6d5

Browse files
authored
misc Zarr V3 bug fixes: open_group, open_consolidated and MemoryStoreV3 (#1006)
* Fix missing zarr_version argument in open_group and open_consolidated * add missing KVStore import at top level * remove outdated comment * Fix implementation of MemoryStoreV3.rename and enable hierarchy tests * some cleanup in V3 group tests * pep8 fix * parameterize consolidated metadata tests to use string paths remove redundant test_consolidated_with_chunk_store test open_group with store and chunk_store specified as string paths
1 parent c7494a9 commit 0aaf6d5

File tree

7 files changed

+186
-149
lines changed

7 files changed

+186
-149
lines changed

zarr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from zarr.hierarchy import Group, group, open_group
1212
from zarr.n5 import N5Store, N5FSStore
1313
from zarr.storage import (ABSStore, DBMStore, DictStore, DirectoryStore,
14-
LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore,
14+
KVStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore,
1515
NestedDirectoryStore, RedisStore, SQLiteStore,
1616
TempStore, ZipStore)
1717
from zarr.sync import ProcessSynchronizer, ThreadSynchronizer

zarr/convenience.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1277,7 +1277,9 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **
12771277
"""
12781278

12791279
# normalize parameters
1280-
store = normalize_store_arg(store, storage_options=kwargs.get("storage_options"), mode=mode)
1280+
zarr_version = kwargs.get('zarr_version', None)
1281+
store = normalize_store_arg(store, storage_options=kwargs.get("storage_options"), mode=mode,
1282+
zarr_version=zarr_version)
12811283
if mode not in {'r', 'r+'}:
12821284
raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}"
12831285
.format(mode))

zarr/hierarchy.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1308,7 +1308,8 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N
13081308
if chunk_store is not None:
13091309
chunk_store = _normalize_store_arg(chunk_store,
13101310
storage_options=storage_options,
1311-
mode=mode)
1311+
mode=mode,
1312+
zarr_version=zarr_version)
13121313
if not getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) == zarr_version:
13131314
raise ValueError(
13141315
"zarr_version of store and chunk_store must match"

zarr/storage.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3045,7 +3045,20 @@ def rename(self, src_path: Path, dst_path: Path):
30453045
src_parent, src_key = self._get_parent(base + src_path)
30463046
dst_parent, dst_key = self._require_parent(base + dst_path)
30473047

3048-
dst_parent[dst_key] = src_parent.pop(src_key)
3048+
if src_key in src_parent:
3049+
dst_parent[dst_key] = src_parent.pop(src_key)
3050+
3051+
if base == meta_root:
3052+
# check for and move corresponding metadata
3053+
sfx = _get_metadata_suffix(self)
3054+
src_meta = src_key + '.array' + sfx
3055+
if src_meta in src_parent:
3056+
dst_meta = dst_key + '.array' + sfx
3057+
dst_parent[dst_meta] = src_parent.pop(src_meta)
3058+
src_meta = src_key + '.group' + sfx
3059+
if src_meta in src_parent:
3060+
dst_meta = dst_key + '.group' + sfx
3061+
dst_parent[dst_meta] = src_parent.pop(src_meta)
30493062
any_renamed = True
30503063
any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed
30513064
if not any_renamed:

zarr/tests/test_convenience.py

Lines changed: 78 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -202,23 +202,30 @@ def test_tree(zarr_version):
202202
assert str(zarr.tree(g1)) == str(g1.tree())
203203

204204

205-
# TODO: consolidated metadata currently only supported for v2
206-
207205
@pytest.mark.parametrize('zarr_version', [2, 3])
208206
@pytest.mark.parametrize('with_chunk_store', [False, True], ids=['default', 'with_chunk_store'])
209-
def test_consolidate_metadata(with_chunk_store, zarr_version):
210-
211-
if zarr_version == 2:
212-
MemoryStoreClass = MemoryStore
213-
path = ''
214-
else:
215-
MemoryStoreClass = MemoryStoreV3
216-
path = 'dataset'
217-
207+
@pytest.mark.parametrize('stores_from_path', [False, True])
208+
def test_consolidate_metadata(with_chunk_store, zarr_version, stores_from_path):
218209
# setup initial data
219-
store = MemoryStoreClass()
220-
chunk_store = MemoryStoreClass() if with_chunk_store else None
221-
z = group(store, chunk_store=chunk_store, path=path)
210+
if stores_from_path:
211+
store = tempfile.mkdtemp()
212+
atexit.register(atexit_rmtree, store)
213+
if with_chunk_store:
214+
chunk_store = tempfile.mkdtemp()
215+
atexit.register(atexit_rmtree, chunk_store)
216+
else:
217+
chunk_store = None
218+
version_kwarg = {'zarr_version': zarr_version}
219+
else:
220+
if zarr_version == 2:
221+
store = MemoryStore()
222+
chunk_store = MemoryStore() if with_chunk_store else None
223+
elif zarr_version == 3:
224+
store = MemoryStoreV3()
225+
chunk_store = MemoryStoreV3() if with_chunk_store else None
226+
version_kwarg = {}
227+
path = 'dataset' if zarr_version == 3 else None
228+
z = group(store, chunk_store=chunk_store, path=path, **version_kwarg)
222229
z.create_group('g1')
223230
g2 = z.create_group('g2')
224231
g2.attrs['hello'] = 'world'
@@ -229,64 +236,81 @@ def test_consolidate_metadata(with_chunk_store, zarr_version):
229236
arr[:] = 1.0
230237
assert 16 == arr.nchunks_initialized
231238

239+
if stores_from_path:
240+
# get the actual store class for use with consolidate_metadata
241+
store_class = z._store
242+
else:
243+
store_class = store
244+
232245
if zarr_version == 3:
233246
# error on v3 if path not provided
234247
with pytest.raises(ValueError):
235-
consolidate_metadata(store, path=None)
248+
consolidate_metadata(store_class, path=None)
236249

237250
with pytest.raises(ValueError):
238-
consolidate_metadata(store, path='')
251+
consolidate_metadata(store_class, path='')
239252

240253
# perform consolidation
241-
out = consolidate_metadata(store, path=path)
254+
out = consolidate_metadata(store_class, path=path)
242255
assert isinstance(out, Group)
243256
assert ['g1', 'g2'] == list(out)
244-
if zarr_version == 2:
245-
assert isinstance(out._store, ConsolidatedMetadataStore)
246-
assert '.zmetadata' in store
247-
meta_keys = ['.zgroup',
248-
'g1/.zgroup',
249-
'g2/.zgroup',
250-
'g2/.zattrs',
251-
'g2/arr/.zarray',
252-
'g2/arr/.zattrs']
253-
else:
254-
assert isinstance(out._store, ConsolidatedMetadataStoreV3)
255-
assert 'meta/root/consolidated/.zmetadata' in store
256-
meta_keys = ['zarr.json',
257-
meta_root + 'dataset.group.json',
258-
meta_root + 'dataset/g1.group.json',
259-
meta_root + 'dataset/g2.group.json',
260-
meta_root + 'dataset/g2/arr.array.json',
261-
'meta/root/consolidated.group.json']
262-
for key in meta_keys:
263-
del store[key]
257+
if not stores_from_path:
258+
if zarr_version == 2:
259+
assert isinstance(out._store, ConsolidatedMetadataStore)
260+
assert '.zmetadata' in store
261+
meta_keys = ['.zgroup',
262+
'g1/.zgroup',
263+
'g2/.zgroup',
264+
'g2/.zattrs',
265+
'g2/arr/.zarray',
266+
'g2/arr/.zattrs']
267+
else:
268+
assert isinstance(out._store, ConsolidatedMetadataStoreV3)
269+
assert 'meta/root/consolidated/.zmetadata' in store
270+
meta_keys = ['zarr.json',
271+
meta_root + 'dataset.group.json',
272+
meta_root + 'dataset/g1.group.json',
273+
meta_root + 'dataset/g2.group.json',
274+
meta_root + 'dataset/g2/arr.array.json',
275+
'meta/root/consolidated.group.json']
276+
for key in meta_keys:
277+
del store[key]
264278

265279
# open consolidated
266-
z2 = open_consolidated(store, chunk_store=chunk_store, path=path)
280+
z2 = open_consolidated(store, chunk_store=chunk_store, path=path, **version_kwarg)
267281
assert ['g1', 'g2'] == list(z2)
268282
assert 'world' == z2.g2.attrs['hello']
269283
assert 1 == z2.g2.arr.attrs['data']
270284
assert (z2.g2.arr[:] == 1.0).all()
271285
assert 16 == z2.g2.arr.nchunks
272286
assert 16 == z2.g2.arr.nchunks_initialized
273287

274-
# tests del/write on the store
275-
if zarr_version == 2:
276-
cmd = ConsolidatedMetadataStore(store)
277-
with pytest.raises(PermissionError):
278-
del cmd['.zgroup']
279-
with pytest.raises(PermissionError):
280-
cmd['.zgroup'] = None
288+
if stores_from_path:
289+
# path string is note a BaseStore subclass so cannot be used to
290+
# initialize a ConsolidatedMetadataStore.
291+
if zarr_version == 2:
292+
with pytest.raises(ValueError):
293+
cmd = ConsolidatedMetadataStore(store)
294+
elif zarr_version == 3:
295+
with pytest.raises(ValueError):
296+
cmd = ConsolidatedMetadataStoreV3(store)
281297
else:
282-
cmd = ConsolidatedMetadataStoreV3(store)
283-
with pytest.raises(PermissionError):
284-
del cmd[meta_root + 'dataset.group.json']
285-
with pytest.raises(PermissionError):
286-
cmd[meta_root + 'dataset.group.json'] = None
298+
# tests del/write on the store
299+
if zarr_version == 2:
300+
cmd = ConsolidatedMetadataStore(store)
301+
with pytest.raises(PermissionError):
302+
del cmd['.zgroup']
303+
with pytest.raises(PermissionError):
304+
cmd['.zgroup'] = None
305+
else:
306+
cmd = ConsolidatedMetadataStoreV3(store)
307+
with pytest.raises(PermissionError):
308+
del cmd[meta_root + 'dataset.group.json']
309+
with pytest.raises(PermissionError):
310+
cmd[meta_root + 'dataset.group.json'] = None
287311

288-
# test getsize on the store
289-
assert isinstance(getsize(cmd), Integral)
312+
# test getsize on the store
313+
assert isinstance(getsize(cmd), Integral)
290314

291315
# test new metadata are not writeable
292316
with pytest.raises(PermissionError):
@@ -316,62 +340,11 @@ def test_consolidate_metadata(with_chunk_store, zarr_version):
316340

317341
# make sure keyword arguments are passed through without error
318342
open_consolidated(
319-
store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None
343+
store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None,
344+
**version_kwarg,
320345
)
321346

322347

323-
def test_consolidated_with_chunk_store():
324-
# setup initial data
325-
store = MemoryStore()
326-
chunk_store = MemoryStore()
327-
z = group(store, chunk_store=chunk_store)
328-
z.create_group('g1')
329-
g2 = z.create_group('g2')
330-
g2.attrs['hello'] = 'world'
331-
arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8')
332-
assert 16 == arr.nchunks
333-
assert 0 == arr.nchunks_initialized
334-
arr.attrs['data'] = 1
335-
arr[:] = 1.0
336-
assert 16 == arr.nchunks_initialized
337-
338-
# perform consolidation
339-
out = consolidate_metadata(store)
340-
assert isinstance(out, Group)
341-
assert '.zmetadata' in store
342-
for key in ['.zgroup',
343-
'g1/.zgroup',
344-
'g2/.zgroup',
345-
'g2/.zattrs',
346-
'g2/arr/.zarray',
347-
'g2/arr/.zattrs']:
348-
del store[key]
349-
# open consolidated
350-
z2 = open_consolidated(store, chunk_store=chunk_store)
351-
assert ['g1', 'g2'] == list(z2)
352-
assert 'world' == z2.g2.attrs['hello']
353-
assert 1 == z2.g2.arr.attrs['data']
354-
assert (z2.g2.arr[:] == 1.0).all()
355-
assert 16 == z2.g2.arr.nchunks
356-
assert 16 == z2.g2.arr.nchunks_initialized
357-
358-
# test the data are writeable
359-
z2.g2.arr[:] = 2
360-
assert (z2.g2.arr[:] == 2).all()
361-
362-
# test invalid modes
363-
with pytest.raises(ValueError):
364-
open_consolidated(store, mode='a', chunk_store=chunk_store)
365-
with pytest.raises(ValueError):
366-
open_consolidated(store, mode='w', chunk_store=chunk_store)
367-
with pytest.raises(ValueError):
368-
open_consolidated(store, mode='w-', chunk_store=chunk_store)
369-
370-
# make sure keyword arguments are passed through without error
371-
open_consolidated(store, cache_attrs=True, synchronizer=None,
372-
chunk_store=chunk_store)
373-
374-
375348
@pytest.mark.parametrize("options", (
376349
{"dimension_separator": "/"},
377350
{"dimension_separator": "."},

zarr/tests/test_core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,8 +2720,8 @@ def test_array_init(self):
27202720
assert isinstance(a, Array)
27212721
assert (100,) == a.shape
27222722
assert (10,) == a.chunks
2723-
assert path == a.path # TODO: should this include meta/root?
2724-
assert '/' + path == a.name # TODO: should this include meta/root?
2723+
assert path == a.path
2724+
assert '/' + path == a.name
27252725
assert 'bar' == a.basename
27262726
assert store is a.store
27272727
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
@@ -2772,7 +2772,7 @@ def test_nbytes_stored(self):
27722772
z[:] = 42
27732773
expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != 'zarr.json')
27742774
assert expect_nbytes_stored == z.nbytes_stored
2775-
assert z.nchunks_initialized == 10 # TODO: added temporarily for testing, can remove
2775+
assert z.nchunks_initialized == 10
27762776

27772777
# mess with store
27782778
if not isinstance(z.store, (LRUStoreCacheV3, FSStoreV3)):

0 commit comments

Comments
 (0)