diff --git a/fixture/flat/.zarray b/fixture/flat/.zarray new file mode 100644 index 0000000000..8ec79419da --- /dev/null +++ b/fixture/flat/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "<i8", + "fill_value": 0, + "filters": null, + "order": "C", + "shape": [ + 2, + 2 + ], + "zarr_format": 2 +} \ No newline at end of file diff --git a/fixture/flat/0.0 b/fixture/flat/0.0 new file mode 100644 index 0000000000..cab0cbca72 Binary files /dev/null and b/fixture/flat/0.0 differ diff --git a/fixture/nested/.zarray b/fixture/nested/.zarray new file mode 100644 index 0000000000..00acb02788 --- /dev/null +++ b/fixture/nested/.zarray @@ -0,0 +1,23 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dimension_separator": "/", + "dtype": "<i8", + "fill_value": 0, + "filters": null, + "order": "C", + "shape": [ + 2, + 2 + ], + "zarr_format": 2 +} \ No newline at end of file diff --git a/fixture/nested/0/0 b/fixture/nested/0/0 new file mode 100644 index 0000000000..cab0cbca72 Binary files /dev/null and b/fixture/nested/0/0 differ diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..ba3f2c1e2d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,7 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + '.'.join(map(str, chunk_coords)) + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/n5.py b/zarr/n5.py index 67e39357e7..45e2cdda95 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -295,7 +295,7 @@ def invert_chunk_coords(key): last_segment = segments[-1] if _prog_ckey.match(last_segment): coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) + last_segment = '/'.join(coords[::-1]) segments = segments[:-1] + [last_segment] key = '/'.join(segments) return key diff --git a/zarr/storage.py b/zarr/storage.py index f858e42191..0dcfa2899e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -948,12 +948,37 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): + return self._dimension_separator == "/" and \ + self._nested_listdir(path) or self._flat_listdir(path) + + def _flat_listdir(self, path=None): dir_path = self.dir_path(path) if os.path.isdir(dir_path): return sorted(os.listdir(dir_path)) else: return [] + def _nested_listdir(self, path=None): + children = self._flat_listdir(path=path) + if array_meta_key in children: + # special handling of directories containing an array to map nested chunk + # keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and os.path.isdir(entry_path): + for dir_path, _, file_names in os.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_children.append(rel_path.replace(os.path.sep, '.')) + else: + new_children.append(entry) + return sorted(new_children) + else: + return children + def rename(self, src_path, dst_path): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) @@ -1315,49 +1340,12 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): "NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator - def __getitem__(self, key): - key = _nested_map_ckey(key) - return super().__getitem__(key) - - def __setitem__(self, key, value): - key = _nested_map_ckey(key) - super().__setitem__(key, value) - - def __delitem__(self, key): - key = _nested_map_ckey(key) - super().__delitem__(key) - - def __contains__(self, key): - key = _nested_map_ckey(key) - return super().__contains__(key) - def __eq__(self, other): return ( isinstance(other, NestedDirectoryStore) and self.path == other.path ) - def listdir(self, path=None): - children = super().listdir(path=path) - if array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace(os.path.sep, '.')) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - # noinspection PyPep8Naming class ZipStore(MutableMapping): diff --git a/zarr/tests/test_dim_separator.py b/zarr/tests/test_dim_separator.py new file mode 100644 index 0000000000..281d0c1504 --- /dev/null +++ b/zarr/tests/test_dim_separator.py @@ -0,0 +1,75 @@ +import pytest +from numpy.testing import assert_array_equal + +import zarr +from zarr.core import Array +from zarr.storage import (DirectoryStore, NestedDirectoryStore, FSStore) +from zarr.tests.util import have_fsspec + + +@pytest.fixture(params=("static_nested", + "static_flat", + "directory_nested", + "directory_flat", + "directory_default", + "nesteddirectory_nested", + "nesteddirectory_default", + "fs_nested", + "fs_flat", + "fs_default")) +def dataset(tmpdir, request): + """ + Generate a variety of different Zarrs using + different store implementations as well as + different dimension_separator arguments. + """ + + loc = tmpdir.join("dim_sep_test.zarr") + which = request.param + kwargs = {} + + if which.startswith("static"): + if which.endswith("nested"): + return "fixture/nested" + else: + return "fixture/flat" + + if which.startswith("directory"): + store_class = DirectoryStore + elif which.startswith("nested"): + store_class = NestedDirectoryStore + else: + if have_fsspec is False: + pytest.skip("no fsspec") + store_class = FSStore + kwargs["mode"] = "w" + kwargs["auto_mkdir"] = True + + if which.endswith("nested"): + kwargs["dimension_separator"] = "/" + elif which.endswith("flat"): + kwargs["dimension_separator"] = "." + + store = store_class(str(loc), **kwargs) + zarr.creation.array(store=store, data=[[1, 2], [3, 4]]) + return str(loc) + + +def verify(array): + assert_array_equal(array[:], [[1, 2], [3, 4]]) + + +def test_open(dataset): + verify(zarr.open(dataset)) + + +def test_fsstore(dataset): + verify(Array(store=FSStore(dataset))) + + +def test_directory(dataset): + verify(zarr.Array(store=DirectoryStore(dataset))) + + +def test_nested(dataset): + verify(Array(store=NestedDirectoryStore(dataset))) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index d3f3b0e770..4296ee6364 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -804,12 +804,16 @@ def test_pickle(self): class TestDirectoryStore(StoreTests): - def create_store(self, normalize_keys=False, **kwargs): - skip_if_nested_chunks(**kwargs) - + def create_store(self, + normalize_keys=False, + dimension_separator=".", + **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) + store = DirectoryStore(path, + normalize_keys=normalize_keys, + dimension_separator=dimension_separator, + **kwargs) return store def test_filesystem_path(self): @@ -1150,10 +1154,10 @@ def test_chunk_nesting(self): # any path where last segment looks like a chunk key gets special handling store['0.0'] = b'xxx' assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert b'yyy' == store['foo/10.20.30'] - assert b'yyy' == store['foo/10/20/30'] + # assert b'yyy' == store['foo/10/20/30'] store['42'] = b'zzz' assert b'zzz' == store['42'] @@ -1198,7 +1202,7 @@ def test_chunk_nesting(self): store['0.0'] = b'xxx' assert '0.0' in store assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30']