-
-
Notifications
You must be signed in to change notification settings - Fork 323
Fix DirectoryStore #773
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix DirectoryStore #773
Changes from all commits
d26923a
c06476d
ce8b2f0
e183566
449a67f
10c874e
2e4f4d7
cb62c10
68adca5
f2f75b7
88a39ff
a5f1811
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"chunks": [ | ||
2, | ||
2 | ||
], | ||
"compressor": { | ||
"blocksize": 0, | ||
"clevel": 5, | ||
"cname": "lz4", | ||
"id": "blosc", | ||
"shuffle": 1 | ||
}, | ||
"dtype": "<i8", | ||
"fill_value": 0, | ||
"filters": null, | ||
"order": "C", | ||
"shape": [ | ||
2, | ||
2 | ||
], | ||
"zarr_format": 2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"chunks": [ | ||
2, | ||
2 | ||
], | ||
"compressor": { | ||
"blocksize": 0, | ||
"clevel": 5, | ||
"cname": "lz4", | ||
"id": "blosc", | ||
"shuffle": 1 | ||
}, | ||
"dimension_separator": "/", | ||
"dtype": "<i8", | ||
"fill_value": 0, | ||
"filters": null, | ||
"order": "C", | ||
"shape": [ | ||
2, | ||
2 | ||
], | ||
"zarr_format": 2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import pytest | ||
from numpy.testing import assert_array_equal | ||
|
||
import zarr | ||
from zarr.core import Array | ||
from zarr.storage import (DirectoryStore, NestedDirectoryStore, FSStore) | ||
from zarr.tests.util import have_fsspec | ||
|
||
|
||
@pytest.fixture(params=("static_nested", | ||
"static_flat", | ||
"directory_nested", | ||
"directory_flat", | ||
"directory_default", | ||
"nesteddirectory_nested", | ||
"nesteddirectory_default", | ||
"fs_nested", | ||
"fs_flat", | ||
"fs_default")) | ||
def dataset(tmpdir, request): | ||
""" | ||
Generate a variety of different Zarrs using | ||
different store implementations as well as | ||
different dimension_separator arguments. | ||
""" | ||
|
||
loc = tmpdir.join("dim_sep_test.zarr") | ||
which = request.param | ||
kwargs = {} | ||
|
||
if which.startswith("static"): | ||
if which.endswith("nested"): | ||
return "fixture/nested" | ||
else: | ||
return "fixture/flat" | ||
|
||
if which.startswith("directory"): | ||
store_class = DirectoryStore | ||
elif which.startswith("nested"): | ||
store_class = NestedDirectoryStore | ||
else: | ||
if have_fsspec is False: | ||
pytest.skip("no fsspec") | ||
store_class = FSStore | ||
kwargs["mode"] = "w" | ||
kwargs["auto_mkdir"] = True | ||
|
||
if which.endswith("nested"): | ||
kwargs["dimension_separator"] = "/" | ||
elif which.endswith("flat"): | ||
kwargs["dimension_separator"] = "." | ||
|
||
store = store_class(str(loc), **kwargs) | ||
zarr.creation.array(store=store, data=[[1, 2], [3, 4]]) | ||
return str(loc) | ||
|
||
|
||
def verify(array): | ||
assert_array_equal(array[:], [[1, 2], [3, 4]]) | ||
|
||
|
||
def test_open(dataset): | ||
verify(zarr.open(dataset)) | ||
|
||
|
||
def test_fsstore(dataset): | ||
verify(Array(store=FSStore(dataset))) | ||
|
||
|
||
def test_directory(dataset): | ||
verify(zarr.Array(store=DirectoryStore(dataset))) | ||
|
||
|
||
def test_nested(dataset): | ||
verify(Array(store=NestedDirectoryStore(dataset))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -804,12 +804,16 @@ def test_pickle(self): | |
|
||
class TestDirectoryStore(StoreTests): | ||
|
||
def create_store(self, normalize_keys=False, **kwargs): | ||
skip_if_nested_chunks(**kwargs) | ||
|
||
def create_store(self, | ||
normalize_keys=False, | ||
dimension_separator=".", | ||
**kwargs): | ||
path = tempfile.mkdtemp() | ||
atexit.register(atexit_rmtree, path) | ||
store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) | ||
store = DirectoryStore(path, | ||
normalize_keys=normalize_keys, | ||
dimension_separator=dimension_separator, | ||
**kwargs) | ||
return store | ||
|
||
def test_filesystem_path(self): | ||
|
@@ -1150,10 +1154,10 @@ def test_chunk_nesting(self): | |
# any path where last segment looks like a chunk key gets special handling | ||
store['0.0'] = b'xxx' | ||
assert b'xxx' == store['0.0'] | ||
assert b'xxx' == store['0/0'] | ||
# assert b'xxx' == store['0/0'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did we loose there? Doesn't the store normalise "." -> "/" anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to describe in the commit message on e183566, at least to the best of my understanding:
|
||
store['foo/10.20.30'] = b'yyy' | ||
assert b'yyy' == store['foo/10.20.30'] | ||
assert b'yyy' == store['foo/10/20/30'] | ||
# assert b'yyy' == store['foo/10/20/30'] | ||
store['42'] = b'zzz' | ||
assert b'zzz' == store['42'] | ||
|
||
|
@@ -1198,7 +1202,7 @@ def test_chunk_nesting(self): | |
store['0.0'] = b'xxx' | ||
assert '0.0' in store | ||
assert b'xxx' == store['0.0'] | ||
assert b'xxx' == store['0/0'] | ||
# assert b'xxx' == store['0/0'] | ||
store['foo/10.20.30'] = b'yyy' | ||
assert 'foo/10.20.30' in store | ||
assert b'yyy' == store['foo/10.20.30'] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain how this change is compatible with
FSStore._normalize_key
? https://github.com/zarr-developers/zarr-python/blob/master/zarr/storage.py#L1076FSStore._normalize_key
assumes that all chunk keys are formattedfoo/bar/0.0.0
-- this assumption is the basis of splitting the chunk key into a prefix and a chunk ID viakey.split('/')
. As I understand it, this change breaks this assumption.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reading the
flat
andnested
fixtures from this repo (zarr.open(f"file:///tmp/{x}")[:]
) with some sloppy debugging in place shows:which likely points to some logic in FSStore being ripe for removal since the Store is basically just accepting what what the Array has detected. Now, how it is that that's working with your PR, I still haven't figured out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, as your test shows this is fine for FSStore (and maybe we don't need this code in the store at all if the chunk keys come pre-normalized). But this situation is dire for N5Stores, which need to be able to re-order the chunk keys before writing to storage.