-
-
Notifications
You must be signed in to change notification settings - Fork 304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
misc Zarr V3 bug fixes: open_group, open_consolidated and MemoryStoreV3 #1006
Changes from all commits
f7aba88
c7edcd8
bb6ded9
ab381d8
4722d2f
22d585f
f22f7a0
4b61acc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -202,23 +202,30 @@ def test_tree(zarr_version): | |
assert str(zarr.tree(g1)) == str(g1.tree()) | ||
|
||
|
||
# TODO: consolidated metadata currently only supported for v2 | ||
|
||
@pytest.mark.parametrize('zarr_version', [2, 3]) | ||
@pytest.mark.parametrize('with_chunk_store', [False, True], ids=['default', 'with_chunk_store']) | ||
def test_consolidate_metadata(with_chunk_store, zarr_version): | ||
|
||
if zarr_version == 2: | ||
MemoryStoreClass = MemoryStore | ||
path = '' | ||
else: | ||
MemoryStoreClass = MemoryStoreV3 | ||
path = 'dataset' | ||
|
||
@pytest.mark.parametrize('stores_from_path', [False, True]) | ||
def test_consolidate_metadata(with_chunk_store, zarr_version, stores_from_path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Outwith this PR, but this reminds me if v3 would make standardizing consolidated metadata easier. See zarr-developers/zarr-specs#113 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also /zarr-developers/zarr-specs#136 related to where this metadata would be stored |
||
# setup initial data | ||
store = MemoryStoreClass() | ||
chunk_store = MemoryStoreClass() if with_chunk_store else None | ||
z = group(store, chunk_store=chunk_store, path=path) | ||
if stores_from_path: | ||
store = tempfile.mkdtemp() | ||
atexit.register(atexit_rmtree, store) | ||
if with_chunk_store: | ||
chunk_store = tempfile.mkdtemp() | ||
atexit.register(atexit_rmtree, chunk_store) | ||
else: | ||
chunk_store = None | ||
version_kwarg = {'zarr_version': zarr_version} | ||
else: | ||
if zarr_version == 2: | ||
store = MemoryStore() | ||
chunk_store = MemoryStore() if with_chunk_store else None | ||
elif zarr_version == 3: | ||
store = MemoryStoreV3() | ||
chunk_store = MemoryStoreV3() if with_chunk_store else None | ||
version_kwarg = {} | ||
path = 'dataset' if zarr_version == 3 else None | ||
z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) | ||
z.create_group('g1') | ||
g2 = z.create_group('g2') | ||
g2.attrs['hello'] = 'world' | ||
|
@@ -229,64 +236,81 @@ def test_consolidate_metadata(with_chunk_store, zarr_version): | |
arr[:] = 1.0 | ||
assert 16 == arr.nchunks_initialized | ||
|
||
if stores_from_path: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems fine to have in the test, but this is an interesting block, @grlee77. Can you explain it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are tests below that do |
||
# get the actual store class for use with consolidate_metadata | ||
store_class = z._store | ||
else: | ||
store_class = store | ||
|
||
if zarr_version == 3: | ||
# error on v3 if path not provided | ||
with pytest.raises(ValueError): | ||
consolidate_metadata(store, path=None) | ||
consolidate_metadata(store_class, path=None) | ||
|
||
with pytest.raises(ValueError): | ||
consolidate_metadata(store, path='') | ||
consolidate_metadata(store_class, path='') | ||
|
||
# perform consolidation | ||
out = consolidate_metadata(store, path=path) | ||
out = consolidate_metadata(store_class, path=path) | ||
assert isinstance(out, Group) | ||
assert ['g1', 'g2'] == list(out) | ||
if zarr_version == 2: | ||
assert isinstance(out._store, ConsolidatedMetadataStore) | ||
assert '.zmetadata' in store | ||
meta_keys = ['.zgroup', | ||
'g1/.zgroup', | ||
'g2/.zgroup', | ||
'g2/.zattrs', | ||
'g2/arr/.zarray', | ||
'g2/arr/.zattrs'] | ||
else: | ||
assert isinstance(out._store, ConsolidatedMetadataStoreV3) | ||
assert 'meta/root/consolidated/.zmetadata' in store | ||
meta_keys = ['zarr.json', | ||
meta_root + 'dataset.group.json', | ||
meta_root + 'dataset/g1.group.json', | ||
meta_root + 'dataset/g2.group.json', | ||
meta_root + 'dataset/g2/arr.array.json', | ||
'meta/root/consolidated.group.json'] | ||
for key in meta_keys: | ||
del store[key] | ||
if not stores_from_path: | ||
if zarr_version == 2: | ||
assert isinstance(out._store, ConsolidatedMetadataStore) | ||
assert '.zmetadata' in store | ||
meta_keys = ['.zgroup', | ||
'g1/.zgroup', | ||
'g2/.zgroup', | ||
'g2/.zattrs', | ||
'g2/arr/.zarray', | ||
'g2/arr/.zattrs'] | ||
else: | ||
assert isinstance(out._store, ConsolidatedMetadataStoreV3) | ||
assert 'meta/root/consolidated/.zmetadata' in store | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, outwith this PR but back to zarr-developers/zarr-specs#113, could the v3 spec here simply replace all the individual JSON files solely with the consolidated metadata? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it could be done, but haven't tried it. All store types are already somewhat consolidated for v3 since the entire |
||
meta_keys = ['zarr.json', | ||
meta_root + 'dataset.group.json', | ||
meta_root + 'dataset/g1.group.json', | ||
meta_root + 'dataset/g2.group.json', | ||
meta_root + 'dataset/g2/arr.array.json', | ||
'meta/root/consolidated.group.json'] | ||
for key in meta_keys: | ||
del store[key] | ||
|
||
# open consolidated | ||
z2 = open_consolidated(store, chunk_store=chunk_store, path=path) | ||
z2 = open_consolidated(store, chunk_store=chunk_store, path=path, **version_kwarg) | ||
assert ['g1', 'g2'] == list(z2) | ||
assert 'world' == z2.g2.attrs['hello'] | ||
assert 1 == z2.g2.arr.attrs['data'] | ||
assert (z2.g2.arr[:] == 1.0).all() | ||
assert 16 == z2.g2.arr.nchunks | ||
assert 16 == z2.g2.arr.nchunks_initialized | ||
|
||
# tests del/write on the store | ||
if zarr_version == 2: | ||
cmd = ConsolidatedMetadataStore(store) | ||
with pytest.raises(PermissionError): | ||
del cmd['.zgroup'] | ||
with pytest.raises(PermissionError): | ||
cmd['.zgroup'] = None | ||
if stores_from_path: | ||
# path string is note a BaseStore subclass so cannot be used to | ||
# initialize a ConsolidatedMetadataStore. | ||
if zarr_version == 2: | ||
with pytest.raises(ValueError): | ||
cmd = ConsolidatedMetadataStore(store) | ||
elif zarr_version == 3: | ||
with pytest.raises(ValueError): | ||
cmd = ConsolidatedMetadataStoreV3(store) | ||
else: | ||
cmd = ConsolidatedMetadataStoreV3(store) | ||
with pytest.raises(PermissionError): | ||
del cmd[meta_root + 'dataset.group.json'] | ||
with pytest.raises(PermissionError): | ||
cmd[meta_root + 'dataset.group.json'] = None | ||
# tests del/write on the store | ||
if zarr_version == 2: | ||
cmd = ConsolidatedMetadataStore(store) | ||
with pytest.raises(PermissionError): | ||
del cmd['.zgroup'] | ||
with pytest.raises(PermissionError): | ||
cmd['.zgroup'] = None | ||
else: | ||
cmd = ConsolidatedMetadataStoreV3(store) | ||
with pytest.raises(PermissionError): | ||
del cmd[meta_root + 'dataset.group.json'] | ||
with pytest.raises(PermissionError): | ||
cmd[meta_root + 'dataset.group.json'] = None | ||
|
||
# test getsize on the store | ||
assert isinstance(getsize(cmd), Integral) | ||
# test getsize on the store | ||
assert isinstance(getsize(cmd), Integral) | ||
|
||
# test new metadata are not writeable | ||
with pytest.raises(PermissionError): | ||
|
@@ -316,62 +340,11 @@ def test_consolidate_metadata(with_chunk_store, zarr_version): | |
|
||
# make sure keyword arguments are passed through without error | ||
open_consolidated( | ||
store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None | ||
store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None, | ||
**version_kwarg, | ||
) | ||
|
||
|
||
def test_consolidated_with_chunk_store(): | ||
# setup initial data | ||
store = MemoryStore() | ||
chunk_store = MemoryStore() | ||
z = group(store, chunk_store=chunk_store) | ||
z.create_group('g1') | ||
g2 = z.create_group('g2') | ||
g2.attrs['hello'] = 'world' | ||
arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') | ||
assert 16 == arr.nchunks | ||
assert 0 == arr.nchunks_initialized | ||
arr.attrs['data'] = 1 | ||
arr[:] = 1.0 | ||
assert 16 == arr.nchunks_initialized | ||
|
||
# perform consolidation | ||
out = consolidate_metadata(store) | ||
assert isinstance(out, Group) | ||
assert '.zmetadata' in store | ||
for key in ['.zgroup', | ||
'g1/.zgroup', | ||
'g2/.zgroup', | ||
'g2/.zattrs', | ||
'g2/arr/.zarray', | ||
'g2/arr/.zattrs']: | ||
del store[key] | ||
# open consolidated | ||
z2 = open_consolidated(store, chunk_store=chunk_store) | ||
assert ['g1', 'g2'] == list(z2) | ||
assert 'world' == z2.g2.attrs['hello'] | ||
assert 1 == z2.g2.arr.attrs['data'] | ||
assert (z2.g2.arr[:] == 1.0).all() | ||
assert 16 == z2.g2.arr.nchunks | ||
assert 16 == z2.g2.arr.nchunks_initialized | ||
|
||
# test the data are writeable | ||
z2.g2.arr[:] = 2 | ||
assert (z2.g2.arr[:] == 2).all() | ||
|
||
# test invalid modes | ||
with pytest.raises(ValueError): | ||
open_consolidated(store, mode='a', chunk_store=chunk_store) | ||
with pytest.raises(ValueError): | ||
open_consolidated(store, mode='w', chunk_store=chunk_store) | ||
with pytest.raises(ValueError): | ||
open_consolidated(store, mode='w-', chunk_store=chunk_store) | ||
|
||
# make sure keyword arguments are passed through without error | ||
open_consolidated(store, cache_attrs=True, synchronizer=None, | ||
chunk_store=chunk_store) | ||
|
||
|
||
@pytest.mark.parametrize("options", ( | ||
{"dimension_separator": "/"}, | ||
{"dimension_separator": "."}, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps more for a follow-up, but if this is getting extracted does it make sense to list as a parameter?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I notice that's not happening with
storage_options
either, so it probably is more of a higher-level aesthetic decision.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine doing it either way.