Skip to content

Commit da88aa3

Browse files
authored
Fix DirectoryStore (#773)
* Drop skip_if_nested_chunks from test_storage.py * Add failing nested test * Make DirectoryStore dimension_separator aware * Migrate key logic to core rather than storage Previous tests (now commented out) used logic in the store classes to convert "0/0" keys into "0.0" keys, forcing the store to be aware of array details. This tries to swap the logic so that stores are responsible for passing dimension separator values down to the arrays only. Since arrays can also get the dimension_separator value from a .zarray file they are now in charge. * Fix linting in new test * Extend the test suite for dim_sep * Try fsspec 2021.7 (see #802) * Revert "Try fsspec 2021.7 (see #802)" This reverts commit 68adca5. * Fix N5Store * Re-activate contested N5 test
1 parent ce04aaa commit da88aa3

File tree

9 files changed

+158
-46
lines changed

9 files changed

+158
-46
lines changed

fixture/flat/.zarray

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"chunks": [
3+
2,
4+
2
5+
],
6+
"compressor": {
7+
"blocksize": 0,
8+
"clevel": 5,
9+
"cname": "lz4",
10+
"id": "blosc",
11+
"shuffle": 1
12+
},
13+
"dtype": "<i8",
14+
"fill_value": 0,
15+
"filters": null,
16+
"order": "C",
17+
"shape": [
18+
2,
19+
2
20+
],
21+
"zarr_format": 2
22+
}

fixture/flat/0.0

48 Bytes
Binary file not shown.

fixture/nested/.zarray

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"chunks": [
3+
2,
4+
2
5+
],
6+
"compressor": {
7+
"blocksize": 0,
8+
"clevel": 5,
9+
"cname": "lz4",
10+
"id": "blosc",
11+
"shuffle": 1
12+
},
13+
"dimension_separator": "/",
14+
"dtype": "<i8",
15+
"fill_value": 0,
16+
"filters": null,
17+
"order": "C",
18+
"shape": [
19+
2,
20+
2
21+
],
22+
"zarr_format": 2
23+
}

fixture/nested/0/0

48 Bytes
Binary file not shown.

zarr/core.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
19521952
return self._encode_chunk(chunk)
19531953

19541954
def _chunk_key(self, chunk_coords):
1955-
return self._key_prefix + '.'.join(map(str, chunk_coords))
1955+
return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords))
19561956

19571957
def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None):
19581958
# decompress

zarr/n5.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def invert_chunk_coords(key):
295295
last_segment = segments[-1]
296296
if _prog_ckey.match(last_segment):
297297
coords = list(last_segment.split('.'))
298-
last_segment = '.'.join(coords[::-1])
298+
last_segment = '/'.join(coords[::-1])
299299
segments = segments[:-1] + [last_segment]
300300
key = '/'.join(segments)
301301
return key

zarr/storage.py

+25-37
Original file line numberDiff line numberDiff line change
@@ -948,12 +948,37 @@ def dir_path(self, path=None):
948948
return dir_path
949949

950950
def listdir(self, path=None):
951+
return self._dimension_separator == "/" and \
952+
self._nested_listdir(path) or self._flat_listdir(path)
953+
954+
def _flat_listdir(self, path=None):
951955
dir_path = self.dir_path(path)
952956
if os.path.isdir(dir_path):
953957
return sorted(os.listdir(dir_path))
954958
else:
955959
return []
956960

961+
def _nested_listdir(self, path=None):
962+
children = self._flat_listdir(path=path)
963+
if array_meta_key in children:
964+
# special handling of directories containing an array to map nested chunk
965+
# keys back to standard chunk keys
966+
new_children = []
967+
root_path = self.dir_path(path)
968+
for entry in children:
969+
entry_path = os.path.join(root_path, entry)
970+
if _prog_number.match(entry) and os.path.isdir(entry_path):
971+
for dir_path, _, file_names in os.walk(entry_path):
972+
for file_name in file_names:
973+
file_path = os.path.join(dir_path, file_name)
974+
rel_path = file_path.split(root_path + os.path.sep)[1]
975+
new_children.append(rel_path.replace(os.path.sep, '.'))
976+
else:
977+
new_children.append(entry)
978+
return sorted(new_children)
979+
else:
980+
return children
981+
957982
def rename(self, src_path, dst_path):
958983
store_src_path = normalize_storage_path(src_path)
959984
store_dst_path = normalize_storage_path(dst_path)
@@ -1315,49 +1340,12 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"):
13151340
"NestedDirectoryStore only supports '/' as dimension_separator")
13161341
self._dimension_separator = dimension_separator
13171342

1318-
def __getitem__(self, key):
1319-
key = _nested_map_ckey(key)
1320-
return super().__getitem__(key)
1321-
1322-
def __setitem__(self, key, value):
1323-
key = _nested_map_ckey(key)
1324-
super().__setitem__(key, value)
1325-
1326-
def __delitem__(self, key):
1327-
key = _nested_map_ckey(key)
1328-
super().__delitem__(key)
1329-
1330-
def __contains__(self, key):
1331-
key = _nested_map_ckey(key)
1332-
return super().__contains__(key)
1333-
13341343
def __eq__(self, other):
13351344
return (
13361345
isinstance(other, NestedDirectoryStore) and
13371346
self.path == other.path
13381347
)
13391348

1340-
def listdir(self, path=None):
1341-
children = super().listdir(path=path)
1342-
if array_meta_key in children:
1343-
# special handling of directories containing an array to map nested chunk
1344-
# keys back to standard chunk keys
1345-
new_children = []
1346-
root_path = self.dir_path(path)
1347-
for entry in children:
1348-
entry_path = os.path.join(root_path, entry)
1349-
if _prog_number.match(entry) and os.path.isdir(entry_path):
1350-
for dir_path, _, file_names in os.walk(entry_path):
1351-
for file_name in file_names:
1352-
file_path = os.path.join(dir_path, file_name)
1353-
rel_path = file_path.split(root_path + os.path.sep)[1]
1354-
new_children.append(rel_path.replace(os.path.sep, '.'))
1355-
else:
1356-
new_children.append(entry)
1357-
return sorted(new_children)
1358-
else:
1359-
return children
1360-
13611349

13621350
# noinspection PyPep8Naming
13631351
class ZipStore(MutableMapping):

zarr/tests/test_dim_separator.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import pytest
2+
from numpy.testing import assert_array_equal
3+
4+
import zarr
5+
from zarr.core import Array
6+
from zarr.storage import (DirectoryStore, NestedDirectoryStore, FSStore)
7+
from zarr.tests.util import have_fsspec
8+
9+
10+
@pytest.fixture(params=("static_nested",
11+
"static_flat",
12+
"directory_nested",
13+
"directory_flat",
14+
"directory_default",
15+
"nesteddirectory_nested",
16+
"nesteddirectory_default",
17+
"fs_nested",
18+
"fs_flat",
19+
"fs_default"))
20+
def dataset(tmpdir, request):
21+
"""
22+
Generate a variety of different Zarrs using
23+
different store implementations as well as
24+
different dimension_separator arguments.
25+
"""
26+
27+
loc = tmpdir.join("dim_sep_test.zarr")
28+
which = request.param
29+
kwargs = {}
30+
31+
if which.startswith("static"):
32+
if which.endswith("nested"):
33+
return "fixture/nested"
34+
else:
35+
return "fixture/flat"
36+
37+
if which.startswith("directory"):
38+
store_class = DirectoryStore
39+
elif which.startswith("nested"):
40+
store_class = NestedDirectoryStore
41+
else:
42+
if have_fsspec is False:
43+
pytest.skip("no fsspec")
44+
store_class = FSStore
45+
kwargs["mode"] = "w"
46+
kwargs["auto_mkdir"] = True
47+
48+
if which.endswith("nested"):
49+
kwargs["dimension_separator"] = "/"
50+
elif which.endswith("flat"):
51+
kwargs["dimension_separator"] = "."
52+
53+
store = store_class(str(loc), **kwargs)
54+
zarr.creation.array(store=store, data=[[1, 2], [3, 4]])
55+
return str(loc)
56+
57+
58+
def verify(array):
59+
assert_array_equal(array[:], [[1, 2], [3, 4]])
60+
61+
62+
def test_open(dataset):
63+
verify(zarr.open(dataset))
64+
65+
66+
def test_fsstore(dataset):
67+
verify(Array(store=FSStore(dataset)))
68+
69+
70+
def test_directory(dataset):
71+
verify(zarr.Array(store=DirectoryStore(dataset)))
72+
73+
74+
def test_nested(dataset):
75+
verify(Array(store=NestedDirectoryStore(dataset)))

zarr/tests/test_storage.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -804,12 +804,16 @@ def test_pickle(self):
804804

805805
class TestDirectoryStore(StoreTests):
806806

807-
def create_store(self, normalize_keys=False, **kwargs):
808-
skip_if_nested_chunks(**kwargs)
809-
807+
def create_store(self,
808+
normalize_keys=False,
809+
dimension_separator=".",
810+
**kwargs):
810811
path = tempfile.mkdtemp()
811812
atexit.register(atexit_rmtree, path)
812-
store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs)
813+
store = DirectoryStore(path,
814+
normalize_keys=normalize_keys,
815+
dimension_separator=dimension_separator,
816+
**kwargs)
813817
return store
814818

815819
def test_filesystem_path(self):
@@ -1150,10 +1154,10 @@ def test_chunk_nesting(self):
11501154
# any path where last segment looks like a chunk key gets special handling
11511155
store['0.0'] = b'xxx'
11521156
assert b'xxx' == store['0.0']
1153-
assert b'xxx' == store['0/0']
1157+
# assert b'xxx' == store['0/0']
11541158
store['foo/10.20.30'] = b'yyy'
11551159
assert b'yyy' == store['foo/10.20.30']
1156-
assert b'yyy' == store['foo/10/20/30']
1160+
# assert b'yyy' == store['foo/10/20/30']
11571161
store['42'] = b'zzz'
11581162
assert b'zzz' == store['42']
11591163

@@ -1198,7 +1202,7 @@ def test_chunk_nesting(self):
11981202
store['0.0'] = b'xxx'
11991203
assert '0.0' in store
12001204
assert b'xxx' == store['0.0']
1201-
assert b'xxx' == store['0/0']
1205+
# assert b'xxx' == store['0/0']
12021206
store['foo/10.20.30'] = b'yyy'
12031207
assert 'foo/10.20.30' in store
12041208
assert b'yyy' == store['foo/10.20.30']

0 commit comments

Comments
 (0)