From d26923a220bfc02f045ff5373f09a1285872c375 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:33:53 +0200 Subject: [PATCH 01/10] Drop skip_if_nested_chunks from test_storage.py --- zarr/tests/test_storage.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index e9b997b335..b5c738bc29 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -803,12 +803,16 @@ def test_pickle(self): class TestDirectoryStore(StoreTests): - def create_store(self, normalize_keys=False, **kwargs): - skip_if_nested_chunks(**kwargs) - + def create_store(self, + normalize_keys=False, + dimension_separator=".", + **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) + store = DirectoryStore(path, + normalize_keys=normalize_keys, + dimension_separator=dimension_separator, + **kwargs) return store def test_filesystem_path(self): From c06476df551a339eb4528e07cc1d8adc9ffa6372 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:54:41 +0200 Subject: [PATCH 02/10] Add failing nested test --- zarr/tests/test_storage.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index b5c738bc29..cf0172bdd7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -890,6 +890,23 @@ def mock_walker_no_slash(_path): ) assert res == {'.zgroup', 'g1/.zgroup', 'd1/.zarray'} + def test_read_nested(self): + import zarr + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + + store1 = NestedDirectoryStore(path) + g1 = zarr.open(store=store1, mode="w") + data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + + store2 = NestedDirectoryStore(path) + g2 = zarr.open(store=store2) + assert g2.data[0][0] == 1 + + store3 = DirectoryStore(path) + g3 = zarr.open(store=store3) + assert g3.data[0][0] == 1 + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStore(StoreTests): From ce8b2f052c8b493acbb083ef27fd099cff6b76f9 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:34:08 +0200 Subject: [PATCH 03/10] Make DirectoryStore dimension_separator aware --- zarr/storage.py | 70 +++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d2de2cda4c..4ce8ebc120 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,6 +803,10 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key + def _optionally_nested(self, key): + return self._dimension_separator == "/" and \ + _nested_map_ckey(key) or key + def _fromfile(self, fn): """ Read data from a file @@ -838,6 +842,7 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -846,6 +851,7 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): + key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -887,6 +893,7 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -899,6 +906,7 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) @@ -947,12 +955,37 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): + return self._dimension_separator == "/" and \ + self._nested_listdir(path) or self._flat_listdir(path) + + def _flat_listdir(self, path=None): dir_path = self.dir_path(path) if os.path.isdir(dir_path): return sorted(os.listdir(dir_path)) else: return [] + def _nested_listdir(self, path=None): + children = self._flat_listdir(path=path) + if array_meta_key in children: + # special handling of directories containing an array to map nested chunk + # keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and os.path.isdir(entry_path): + for dir_path, _, file_names in os.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_children.append(rel_path.replace(os.path.sep, '.')) + else: + new_children.append(entry) + return sorted(new_children) + else: + return children + def rename(self, src_path, dst_path): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) @@ -1314,49 +1347,12 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): "NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator - def __getitem__(self, key): - key = _nested_map_ckey(key) - return super().__getitem__(key) - - def __setitem__(self, key, value): - key = _nested_map_ckey(key) - super().__setitem__(key, value) - - def __delitem__(self, key): - key = _nested_map_ckey(key) - super().__delitem__(key) - - def __contains__(self, key): - key = _nested_map_ckey(key) - return super().__contains__(key) - def __eq__(self, other): return ( isinstance(other, NestedDirectoryStore) and self.path == other.path ) - def listdir(self, path=None): - children = super().listdir(path=path) - if array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace(os.path.sep, '.')) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - # noinspection PyPep8Naming class ZipStore(MutableMapping): From e1835667116b3f86fe002183a6b527565743795c Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 20:21:13 +0200 Subject: [PATCH 04/10] Migrate key logic to core rather than storage Previous tests (now commented out) used logic in the store classes to convert "0/0" keys into "0.0" keys, forcing the store to be aware of array details. This tries to swap the logic so that stores are responsible for passing dimension separator values down to the arrays only. Since arrays can also get the dimension_separator value from a .zarray file they are now in charge. --- zarr/core.py | 2 +- zarr/storage.py | 8 -------- zarr/tests/test_storage.py | 8 ++++---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..ba3f2c1e2d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,7 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + '.'.join(map(str, chunk_coords)) + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/storage.py b/zarr/storage.py index 4ce8ebc120..42c60d50a1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,10 +803,6 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key - def _optionally_nested(self, key): - return self._dimension_separator == "/" and \ - _nested_map_ckey(key) or key - def _fromfile(self, fn): """ Read data from a file @@ -842,7 +838,6 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -851,7 +846,6 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): - key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -893,7 +887,6 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -906,7 +899,6 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index cf0172bdd7..938746ca40 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1165,10 +1165,10 @@ def test_chunk_nesting(self): # any path where last segment looks like a chunk key gets special handling store['0.0'] = b'xxx' assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert b'yyy' == store['foo/10.20.30'] - assert b'yyy' == store['foo/10/20/30'] + # assert b'yyy' == store['foo/10/20/30'] store['42'] = b'zzz' assert b'zzz' == store['42'] @@ -1213,12 +1213,12 @@ def test_chunk_nesting(self): store['0.0'] = b'xxx' assert '0.0' in store assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] + # assert b'yyy' == store['foo/30/20/10'] store['42'] = b'zzz' assert '42' in store assert b'zzz' == store['42'] From 449a67fc943d3555739f5d61a54293911ea39c1a Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 21:45:08 +0200 Subject: [PATCH 05/10] Fix linting in new test --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 938746ca40..a690a36d21 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -897,7 +897,7 @@ def test_read_nested(self): store1 = NestedDirectoryStore(path) g1 = zarr.open(store=store1, mode="w") - data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + g1.create_dataset("data", data=[[1, 2], [3, 4]]) store2 = NestedDirectoryStore(path) g2 = zarr.open(store=store2) From 2e4f4d7990de3fdfcb3d200ca29536b622db7728 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 17 Jun 2021 13:41:10 +0200 Subject: [PATCH 06/10] Extend the test suite for dim_sep --- fixture/flat/.zarray | 22 +++++++++ fixture/flat/0.0 | Bin 0 -> 48 bytes fixture/nested/.zarray | 23 ++++++++++ fixture/nested/0/0 | Bin 0 -> 48 bytes zarr/tests/test_dim_separator.py | 75 +++++++++++++++++++++++++++++++ zarr/tests/test_storage.py | 17 ------- 6 files changed, 120 insertions(+), 17 deletions(-) create mode 100644 fixture/flat/.zarray create mode 100644 fixture/flat/0.0 create mode 100644 fixture/nested/.zarray create mode 100644 fixture/nested/0/0 create mode 100644 zarr/tests/test_dim_separator.py diff --git a/fixture/flat/.zarray b/fixture/flat/.zarray new file mode 100644 index 0000000000..8ec79419da --- /dev/null +++ b/fixture/flat/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " Date: Tue, 17 Aug 2021 14:07:53 +0200 Subject: [PATCH 07/10] Try fsspec 2021.7 (see #802) --- requirements_dev_optional.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index b037f0e77f..8c67a30abb 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,6 +17,5 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -s3fs==2021.6.0 -fsspec==2021.6.0 +fsspec==2021.7.0 moto[server]>=1.3.14 From f2f75b7fb4ff92eea2b1df41c31feaafd4687301 Mon Sep 17 00:00:00 2001 From: jmoore Date: Tue, 17 Aug 2021 14:25:10 +0200 Subject: [PATCH 08/10] Revert "Try fsspec 2021.7 (see #802)" This reverts commit 68adca50b62441dabc6b3f48364fe3dcf35eeb69. --- requirements_dev_optional.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 8c67a30abb..b037f0e77f 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,5 +17,6 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -fsspec==2021.7.0 +s3fs==2021.6.0 +fsspec==2021.6.0 moto[server]>=1.3.14 From 88a39ff3f849441a325481ae1fc7b1430e25428a Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 15:44:09 +0200 Subject: [PATCH 09/10] Fix N5Store --- zarr/n5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/n5.py b/zarr/n5.py index 67e39357e7..45e2cdda95 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -295,7 +295,7 @@ def invert_chunk_coords(key): last_segment = segments[-1] if _prog_ckey.match(last_segment): coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) + last_segment = '/'.join(coords[::-1]) segments = segments[:-1] + [last_segment] key = '/'.join(segments) return key From a5f1811e424f7a6dd8858ae10bea13c8f74f3b5d Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 15:55:30 +0200 Subject: [PATCH 10/10] Re-activate contested N5 test --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 854166ed90..4296ee6364 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1207,7 +1207,7 @@ def test_chunk_nesting(self): assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] # N5 reverses axis order - # assert b'yyy' == store['foo/30/20/10'] + assert b'yyy' == store['foo/30/20/10'] store['42'] = b'zzz' assert '42' in store assert b'zzz' == store['42']