From d26923a220bfc02f045ff5373f09a1285872c375 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:33:53 +0200 Subject: [PATCH 01/36] Drop skip_if_nested_chunks from test_storage.py --- zarr/tests/test_storage.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index e9b997b335..b5c738bc29 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -803,12 +803,16 @@ def test_pickle(self): class TestDirectoryStore(StoreTests): - def create_store(self, normalize_keys=False, **kwargs): - skip_if_nested_chunks(**kwargs) - + def create_store(self, + normalize_keys=False, + dimension_separator=".", + **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = DirectoryStore(path, normalize_keys=normalize_keys, **kwargs) + store = DirectoryStore(path, + normalize_keys=normalize_keys, + dimension_separator=dimension_separator, + **kwargs) return store def test_filesystem_path(self): From c06476df551a339eb4528e07cc1d8adc9ffa6372 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:54:41 +0200 Subject: [PATCH 02/36] Add failing nested test --- zarr/tests/test_storage.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index b5c738bc29..cf0172bdd7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -890,6 +890,23 @@ def mock_walker_no_slash(_path): ) assert res == {'.zgroup', 'g1/.zgroup', 'd1/.zarray'} + def test_read_nested(self): + import zarr + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + + store1 = NestedDirectoryStore(path) + g1 = zarr.open(store=store1, mode="w") + data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + + store2 = NestedDirectoryStore(path) + g2 = zarr.open(store=store2) + assert g2.data[0][0] == 1 + + store3 = DirectoryStore(path) + g3 = zarr.open(store=store3) + assert g3.data[0][0] == 1 + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStore(StoreTests): From ce8b2f052c8b493acbb083ef27fd099cff6b76f9 Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 13:34:08 +0200 Subject: [PATCH 03/36] Make DirectoryStore dimension_separator aware --- zarr/storage.py | 70 +++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d2de2cda4c..4ce8ebc120 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,6 +803,10 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key + def _optionally_nested(self, key): + return self._dimension_separator == "/" and \ + _nested_map_ckey(key) or key + def _fromfile(self, fn): """ Read data from a file @@ -838,6 +842,7 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -846,6 +851,7 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): + key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -887,6 +893,7 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -899,6 +906,7 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): + key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) @@ -947,12 +955,37 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): + return self._dimension_separator == "/" and \ + self._nested_listdir(path) or self._flat_listdir(path) + + def _flat_listdir(self, path=None): dir_path = self.dir_path(path) if os.path.isdir(dir_path): return sorted(os.listdir(dir_path)) else: return [] + def _nested_listdir(self, path=None): + children = self._flat_listdir(path=path) + if array_meta_key in children: + # special handling of directories containing an array to map nested chunk + # keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and os.path.isdir(entry_path): + for dir_path, _, file_names in os.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_children.append(rel_path.replace(os.path.sep, '.')) + else: + new_children.append(entry) + return sorted(new_children) + else: + return children + def rename(self, src_path, dst_path): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) @@ -1314,49 +1347,12 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): "NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator - def __getitem__(self, key): - key = _nested_map_ckey(key) - return super().__getitem__(key) - - def __setitem__(self, key, value): - key = _nested_map_ckey(key) - super().__setitem__(key, value) - - def __delitem__(self, key): - key = _nested_map_ckey(key) - super().__delitem__(key) - - def __contains__(self, key): - key = _nested_map_ckey(key) - return super().__contains__(key) - def __eq__(self, other): return ( isinstance(other, NestedDirectoryStore) and self.path == other.path ) - def listdir(self, path=None): - children = super().listdir(path=path) - if array_meta_key in children: - # special handling of directories containing an array to map nested chunk - # keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and os.path.isdir(entry_path): - for dir_path, _, file_names in os.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace(os.path.sep, '.')) - else: - new_children.append(entry) - return sorted(new_children) - else: - return children - # noinspection PyPep8Naming class ZipStore(MutableMapping): From e1835667116b3f86fe002183a6b527565743795c Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 20:21:13 +0200 Subject: [PATCH 04/36] Migrate key logic to core rather than storage Previous tests (now commented out) used logic in the store classes to convert "0/0" keys into "0.0" keys, forcing the store to be aware of array details. This tries to swap the logic so that stores are responsible for passing dimension separator values down to the arrays only. Since arrays can also get the dimension_separator value from a .zarray file they are now in charge. --- zarr/core.py | 2 +- zarr/storage.py | 8 -------- zarr/tests/test_storage.py | 8 ++++---- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..ba3f2c1e2d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,7 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + '.'.join(map(str, chunk_coords)) + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/storage.py b/zarr/storage.py index 4ce8ebc120..42c60d50a1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -803,10 +803,6 @@ def __init__(self, path, normalize_keys=False, dimension_separator=None): def _normalize_key(self, key): return key.lower() if self.normalize_keys else key - def _optionally_nested(self, key): - return self._dimension_separator == "/" and \ - _nested_map_ckey(key) or key - def _fromfile(self, fn): """ Read data from a file @@ -842,7 +838,6 @@ def _tofile(self, a, fn): f.write(a) def __getitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) filepath = os.path.join(self.path, key) if os.path.isfile(filepath): @@ -851,7 +846,6 @@ def __getitem__(self, key): raise KeyError(key) def __setitem__(self, key, value): - key = self._optionally_nested(key) key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) @@ -893,7 +887,6 @@ def __setitem__(self, key, value): os.remove(temp_path) def __delitem__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) path = os.path.join(self.path, key) if os.path.isfile(path): @@ -906,7 +899,6 @@ def __delitem__(self, key): raise KeyError(key) def __contains__(self, key): - key = self._optionally_nested(key) key = self._normalize_key(key) file_path = os.path.join(self.path, key) return os.path.isfile(file_path) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index cf0172bdd7..938746ca40 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1165,10 +1165,10 @@ def test_chunk_nesting(self): # any path where last segment looks like a chunk key gets special handling store['0.0'] = b'xxx' assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert b'yyy' == store['foo/10.20.30'] - assert b'yyy' == store['foo/10/20/30'] + # assert b'yyy' == store['foo/10/20/30'] store['42'] = b'zzz' assert b'zzz' == store['42'] @@ -1213,12 +1213,12 @@ def test_chunk_nesting(self): store['0.0'] = b'xxx' assert '0.0' in store assert b'xxx' == store['0.0'] - assert b'xxx' == store['0/0'] + # assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] + # assert b'yyy' == store['foo/30/20/10'] store['42'] = b'zzz' assert '42' in store assert b'zzz' == store['42'] From 449a67fc943d3555739f5d61a54293911ea39c1a Mon Sep 17 00:00:00 2001 From: jmoore Date: Mon, 14 Jun 2021 21:45:08 +0200 Subject: [PATCH 05/36] Fix linting in new test --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 938746ca40..a690a36d21 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -897,7 +897,7 @@ def test_read_nested(self): store1 = NestedDirectoryStore(path) g1 = zarr.open(store=store1, mode="w") - data = g1.create_dataset("data", data=[[1, 2], [3, 4]]) + g1.create_dataset("data", data=[[1, 2], [3, 4]]) store2 = NestedDirectoryStore(path) g2 = zarr.open(store=store2) From 2e4f4d7990de3fdfcb3d200ca29536b622db7728 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 17 Jun 2021 13:41:10 +0200 Subject: [PATCH 06/36] Extend the test suite for dim_sep --- fixture/flat/.zarray | 22 +++++++++ fixture/flat/0.0 | Bin 0 -> 48 bytes fixture/nested/.zarray | 23 ++++++++++ fixture/nested/0/0 | Bin 0 -> 48 bytes zarr/tests/test_dim_separator.py | 75 +++++++++++++++++++++++++++++++ zarr/tests/test_storage.py | 17 ------- 6 files changed, 120 insertions(+), 17 deletions(-) create mode 100644 fixture/flat/.zarray create mode 100644 fixture/flat/0.0 create mode 100644 fixture/nested/.zarray create mode 100644 fixture/nested/0/0 create mode 100644 zarr/tests/test_dim_separator.py diff --git a/fixture/flat/.zarray b/fixture/flat/.zarray new file mode 100644 index 0000000000..8ec79419da --- /dev/null +++ b/fixture/flat/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": " Date: Mon, 5 Jul 2021 19:01:59 -0400 Subject: [PATCH 07/36] add n5fsstore and tests --- zarr/n5.py | 246 ++++++++++++++++++++++++++++++++++++- zarr/storage.py | 10 +- zarr/tests/test_storage.py | 82 ++++++++++++- 3 files changed, 332 insertions(+), 6 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index fa01005302..4d53902ac8 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -11,7 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, FSStore, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -281,6 +281,250 @@ def _contains_attrs(self, path): return len(attrs) > 0 +class N5FSStore(FSStore): + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + normalize_keys : bool, optional + If True, all store keys will be normalized to use lower case characters + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be + useful to avoid potential discrepancies between case-senstive and + case-insensitive file system. Default value is False. + Examples + -------- + Store a single array:: + >>> import zarr + >>> store = zarr.N5FSStore('data/array.n5') + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + Store a group:: + >>> store = zarr.N5FSStore('data/group.n5') + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + Notes + ----- + This is an experimental feature. + Safe to write in multiple threads or processes. + """ + + def __init__(self, *args, **kwargs): + kwargs["key_separator"] = "/" + kwargs["meta_keys"] = ("attributes.json",) + super().__init__(*args, **kwargs) + + def _normalize_key(self, key): + if is_chunk_key(key): + key = invert_chunk_coords(key) + + key = normalize_storage_path(key).lstrip("/") + if key: + *bits, end = key.split("/") + + if end not in self._META_KEYS: + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + value = attrs_to_zarr(self._load_n5_attrs(key)) + + if len(value) == 0: + raise KeyError(key) + else: + return json_dumps(value) + return super().__getitem__(key) + + def __setitem__(self, key, value): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) + + value = json_dumps(n5_attrs) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) + + value = json_dumps(n5_attrs) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json_loads(value) + + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError( + "Can not set attribute %s, this is a reserved N5 keyword" % k + ) + + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] + + # add new user attributes + n5_attrs.update(**zarr_attrs) + + value = json_dumps(n5_attrs) + + super().__setitem__(key, value) + + def __delitem__(self, key): + + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + + super().__delitem__(key) + + def __contains__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return "dimensions" not in self._load_n5_attrs(key) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) + # array if attributes contain 'dimensions' + return "dimensions" in self._load_n5_attrs(key) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._META_KEYS[0]) + return self._contains_attrs(key) + + return super().__contains__(key) + + def __eq__(self, other): + return isinstance(other, N5FSStore) and self.path == other.path + + def listdir(self, path=None): + + if path is not None: + path = invert_chunk_coords(path) + + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super().listdir(path=path) + if self._is_array(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(self._META_KEYS[0]) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) + + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and self.fs.isdir(entry_path): + for dir_path, _, file_names in self.fs.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_child = rel_path.replace(os.path.sep, ".") + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) + + return sorted(new_children) + + elif self._is_group(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(self._META_KEYS[0]) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + + return sorted(children) + + else: + + return children + + def _load_n5_attrs(self, path): + try: + s = super().__getitem__(path) + return json_loads(s) + except KeyError: + return {} + + def _is_group(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + attrs_key = os.path.join(path, self._META_KEYS[0]) + + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs + + def _is_array(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + attrs_key = os.path.join(path, self._META_KEYS[0]) + + return "dimensions" in self._load_n5_attrs(attrs_key) + + def _contains_attrs(self, path): + + if path is None: + attrs_key = self._META_KEYS[0] + else: + if not path.endswith(self._META_KEYS[0]): + attrs_key = os.path.join(path, self._META_KEYS[0]) + else: # pragma: no cover + attrs_key = path + + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 + + def is_chunk_key(key): segments = list(key.split('/')) if segments: diff --git a/zarr/storage.py b/zarr/storage.py index c332ee02f5..c6a09c2475 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1019,14 +1019,15 @@ class FSStore(MutableMapping): exceptions : list of Exception subclasses When accessing data, any of these exceptions will be treated as a missing key + meta_keys : list or tuple of str + Defaults to the zarr meta keys, i.e. (".zarray", ".zgroup", ".zattrs"). storage_options : passed to the fsspec implementation """ - _META_KEYS = (attrs_key, group_meta_key, array_meta_key) - def __init__(self, url, normalize_keys=True, key_separator='.', mode='w', exceptions=(KeyError, PermissionError, IOError), + meta_keys=(array_meta_key, group_meta_key, attrs_key), **storage_options): import fsspec self.normalize_keys = normalize_keys @@ -1036,6 +1037,7 @@ def __init__(self, url, normalize_keys=True, key_separator='.', self.path = self.fs._strip_protocol(url) self.mode = mode self.exceptions = exceptions + self._META_KEYS = meta_keys if self.fs.exists(self.path) and not self.fs.isdir(self.path): raise FSPathExistNotDir(url) @@ -1044,7 +1046,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split('/') - if end not in FSStore._META_KEYS: + if end not in self._META_KEYS: end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) @@ -1052,7 +1054,7 @@ def _normalize_key(self, key): def getitems(self, keys, **kwargs): keys = [self._normalize_key(key) for key in keys] - return self.map.getitems(keys, on_error="omit") + return self.map.getitems(keys, **kwargs) def __getitem__(self, key): key = self._normalize_key(key) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index a6598f2781..f0b58e6352 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -22,7 +22,7 @@ from zarr.meta import (ZARR_FORMAT, decode_array_metadata, decode_group_metadata, encode_array_metadata, encode_group_metadata) -from zarr.n5 import N5Store +from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, @@ -1197,6 +1197,86 @@ def test_filters(self): init_array(store, shape=1000, chunks=100, filters=filters) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestN5FSStore(TestFSStore, unittest.TestCase): + def create_store(self, normalize_keys=False): + path = tempfile.mkdtemp(suffix='.n5') + atexit.register(atexit_rmtree, path) + store = N5FSStore(path, normalize_keys=normalize_keys) + return store + + def test_equal(self): + store_a = self.create_store() + store_b = N5FSStore(store_a.path) + assert store_a == store_b + + def test_init_array(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100) + + # check metadata + assert array_meta_key in store + meta = decode_array_metadata(store[array_meta_key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert default_compressor.get_config() == compressor_config + # N5Store always has a fill value of 0 + assert meta['fill_value'] == 0 + + def test_init_array_path(self): + path = 'foo/bar' + store = self.create_store() + init_array(store, shape=1000, chunks=100, path=path) + + # check metadata + key = path + '/' + array_meta_key + assert key in store + meta = decode_array_metadata(store[key]) + assert ZARR_FORMAT == meta['zarr_format'] + assert (1000,) == meta['shape'] + assert (100,) == meta['chunks'] + assert np.dtype(None) == meta['dtype'] + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert default_compressor.get_config() == compressor_config + # N5Store always has a fill value of 0 + assert meta['fill_value'] == 0 + + def test_init_array_compat(self): + store = self.create_store() + init_array(store, shape=1000, chunks=100, compressor='none') + meta = decode_array_metadata(store[array_meta_key]) + # N5Store wraps the actual compressor + compressor_config = meta['compressor']['compressor_config'] + assert compressor_config is None + + def test_init_array_overwrite(self): + self._test_init_array_overwrite('C') + + def test_init_array_overwrite_path(self): + self._test_init_array_overwrite_path('C') + + def test_init_array_overwrite_chunk_store(self): + self._test_init_array_overwrite_chunk_store('C') + + def test_init_group_overwrite(self): + self._test_init_group_overwrite('C') + + def test_init_group_overwrite_path(self): + self._test_init_group_overwrite_path('C') + + def test_init_group_overwrite_chunk_store(self): + self._test_init_group_overwrite_chunk_store('C') + + def test_key_separator(self): + with pytest.raises(TypeError): + self.create_store(key_separator='.') + + @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): From bb1121c721aad0a13afabc773c81defd341a9530 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:22:53 -0400 Subject: [PATCH 08/36] slightly smarter kwarg interception --- zarr/n5.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 3f98f850bf..cfbe450343 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -313,9 +313,19 @@ class N5FSStore(FSStore): """ def __init__(self, *args, **kwargs): - kwargs["key_separator"] = "/" - kwargs["meta_keys"] = ("attributes.json",) - super().__init__(*args, **kwargs) + if 'dimension_separator' in kwargs: + kwargs.pop('dimension_separator') + warnings.warn('Keyword argument `dimension_separator` will be ignored') + dimension_separator = "/" + + if 'meta_keys' in kwargs: + kwargs.pop('meta_keys') + warnings.warn('Keyword argument `meta_keys` will be ignored') + meta_keys = ("attributes.json",) + super().__init__(*args, + dimension_separator=dimension_separator, + meta_keys=meta_keys, + **kwargs) def _normalize_key(self, key): if is_chunk_key(key): From be8f37fae8d154aa40843f407bf61e1c0ae73f59 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:23:36 -0400 Subject: [PATCH 09/36] remove outdated unittest ref and fix the name of a test func --- zarr/tests/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 35c36a2611..afd4333c32 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1288,7 +1288,7 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -class TestN5FSStore(TestFSStore, unittest.TestCase): +class TestN5FSStore(TestFSStore): def create_store(self, normalize_keys=False): path = tempfile.mkdtemp(suffix='.n5') atexit.register(atexit_rmtree, path) @@ -1362,7 +1362,7 @@ def test_init_group_overwrite_path(self): def test_init_group_overwrite_chunk_store(self): self._test_init_group_overwrite_chunk_store('C') - def test_key_separator(self): + def test_dimension_separator(self): with pytest.raises(TypeError): self.create_store(key_separator='.') From 95b257366b5752adee0f6f57d4491500ba1ebf6b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 5 Jul 2021 20:24:16 -0400 Subject: [PATCH 10/36] fix massive string block and fix default key_separator kwarg for FSStore --- zarr/storage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 23819064aa..8833b82dbb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1036,11 +1036,13 @@ class FSStore(MutableMapping): When accessing data, any of these exceptions will be treated as a missing key meta_keys : list or tuple of str - Defaults to the zarr meta keys, i.e. (".zarray", ".zgroup", ".zattrs"). + Reserved keys for metadata. + Defaults to the zarr metatadata keys, i.e. (".zarray", ".zgroup", ".zattrs"). dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. storage_options : passed to the fsspec implementation - def __init__(self, url, normalize_keys=True, key_separator='.', + """ + def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', exceptions=(KeyError, PermissionError, IOError), meta_keys=(array_meta_key, group_meta_key, attrs_key), @@ -1065,7 +1067,6 @@ def __init__(self, url, normalize_keys=True, key_separator='.', # Pass attributes to array creation self._dimension_separator = dimension_separator - if self.fs.exists(self.path) and not self.fs.isdir(self.path): raise FSPathExistNotDir(url) From ceba78d0a5dec53e81032012353346722dd415e8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 Jul 2021 09:21:46 -0400 Subject: [PATCH 11/36] flake8 --- zarr/n5.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index cfbe450343..b654c7dd31 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -317,15 +317,15 @@ def __init__(self, *args, **kwargs): kwargs.pop('dimension_separator') warnings.warn('Keyword argument `dimension_separator` will be ignored') dimension_separator = "/" - + if 'meta_keys' in kwargs: kwargs.pop('meta_keys') warnings.warn('Keyword argument `meta_keys` will be ignored') meta_keys = ("attributes.json",) - super().__init__(*args, - dimension_separator=dimension_separator, - meta_keys=meta_keys, - **kwargs) + super().__init__(*args, + dimension_separator=dimension_separator, + meta_keys=meta_keys, + **kwargs) def _normalize_key(self, key): if is_chunk_key(key): From 02ea91c949514d9fddbcd82416cfa98f7ee0b9f1 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 Jul 2021 10:47:45 -0400 Subject: [PATCH 12/36] promote n5store to toplevel import and fix examples in docstring --- zarr/__init__.py | 2 +- zarr/n5.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/zarr/__init__.py b/zarr/__init__.py index 8079bab071..7558ce77de 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -9,7 +9,7 @@ zeros_like) from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group -from zarr.n5 import N5Store +from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, DBMStore, DictStore, DirectoryStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, NestedDirectoryStore, RedisStore, SQLiteStore, diff --git a/zarr/n5.py b/zarr/n5.py index b654c7dd31..7f7650bd89 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -293,19 +293,24 @@ class N5FSStore(FSStore): (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be useful to avoid potential discrepancies between case-senstive and case-insensitive file system. Default value is False. + Examples -------- Store a single array:: + >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5') + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 + Store a group:: - >>> store = zarr.N5FSStore('data/group.n5') + + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) >>> root = zarr.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 + Notes ----- This is an experimental feature. From 68adca50b62441dabc6b3f48364fe3dcf35eeb69 Mon Sep 17 00:00:00 2001 From: jmoore Date: Tue, 17 Aug 2021 14:07:53 +0200 Subject: [PATCH 13/36] Try fsspec 2021.7 (see #802) --- requirements_dev_optional.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index b037f0e77f..8c67a30abb 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,6 +17,5 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -s3fs==2021.6.0 -fsspec==2021.6.0 +fsspec==2021.7.0 moto[server]>=1.3.14 From f2f75b7fb4ff92eea2b1df41c31feaafd4687301 Mon Sep 17 00:00:00 2001 From: jmoore Date: Tue, 17 Aug 2021 14:25:10 +0200 Subject: [PATCH 14/36] Revert "Try fsspec 2021.7 (see #802)" This reverts commit 68adca50b62441dabc6b3f48364fe3dcf35eeb69. --- requirements_dev_optional.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 8c67a30abb..b037f0e77f 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -17,5 +17,6 @@ flake8==3.9.2 pytest-cov==2.12.1 pytest-doctestplus==0.10.1 h5py==3.3.0 -fsspec==2021.7.0 +s3fs==2021.6.0 +fsspec==2021.6.0 moto[server]>=1.3.14 From a57b3bc8930b67418e21306b87169b8d27b00805 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 17 Aug 2021 14:01:03 -0400 Subject: [PATCH 15/36] Add missing core tests for N5FSStore, and rchanges required for making them pass --- zarr/n5.py | 559 ++++++++++++++++++------------------- zarr/storage.py | 20 +- zarr/tests/test_core.py | 18 +- zarr/tests/test_storage.py | 4 +- 4 files changed, 308 insertions(+), 293 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 7f7650bd89..99ec60f299 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -11,7 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, FSStore, normalize_storage_path +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -281,343 +281,338 @@ def _contains_attrs(self, path): return len(attrs) > 0 -class N5FSStore(FSStore): - """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, - which allows storage on a variety of filesystems. Based on `zarr.N5Store`. - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-senstive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - This is an experimental feature. - Safe to write in multiple threads or processes. - """ - - def __init__(self, *args, **kwargs): - if 'dimension_separator' in kwargs: - kwargs.pop('dimension_separator') - warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "/" - - if 'meta_keys' in kwargs: - kwargs.pop('meta_keys') - warnings.warn('Keyword argument `meta_keys` will be ignored') - meta_keys = ("attributes.json",) - super().__init__(*args, - dimension_separator=dimension_separator, - meta_keys=meta_keys, - **kwargs) - - def _normalize_key(self, key): - if is_chunk_key(key): - key = invert_chunk_coords(key) - - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in self._META_KEYS: - end = end.replace(".", self.key_separator) - key = "/".join(bits + [end]) - return key.lower() if self.normalize_keys else key - - def __getitem__(self, key): - if key.endswith(zarr_group_meta_key): - - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - value = group_metadata_to_zarr(self._load_n5_attrs(key)) - - return json_dumps(value) - - elif key.endswith(zarr_array_meta_key): +try: + from .storage import FSStore + + class N5FSStore(FSStore): + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + normalize_keys : bool, optional + If True, all store keys will be normalized to use lower case characters + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be + useful to avoid potential discrepancies between case-senstive and + case-insensitive file system. Default value is False. + + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + + Store a group:: + + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + + Notes + ----- + This is an experimental feature. + Safe to write in multiple threads or processes. + """ + array_meta_key = 'attributes.json' + group_meta_key = 'attributes.json' + attrs_key = 'attributes.json' + + def __init__(self, *args, **kwargs): + if 'dimension_separator' in kwargs: + kwargs.pop('dimension_separator') + warnings.warn('Keyword argument `dimension_separator` will be ignored') + dimension_separator = "/" + super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + + def _normalize_key(self, key): + if is_chunk_key(key): + key = invert_chunk_coords(key) + + key = normalize_storage_path(key).lstrip("/") + if key: + *bits, end = key.split("/") + + if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self.group_meta_key) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - value = array_metadata_to_zarr(self._load_n5_attrs(key)) - - return json_dumps(value) - - elif key.endswith(zarr_attrs_key): - - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - value = attrs_to_zarr(self._load_n5_attrs(key)) - - if len(value) == 0: - raise KeyError(key) - else: return json_dumps(value) - return super().__getitem__(key) - - def __setitem__(self, key, value): - if key.endswith(zarr_group_meta_key): - - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - - value = json_dumps(n5_attrs) - elif key.endswith(zarr_array_meta_key): - - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**array_metadata_to_n5(json_loads(value))) + elif key.endswith(zarr_array_meta_key): - value = json_dumps(n5_attrs) + key = key.replace(zarr_array_meta_key, self.array_meta_key) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) - elif key.endswith(zarr_attrs_key): - - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - - n5_attrs = self._load_n5_attrs(key) - zarr_attrs = json_loads(value) - - for k in n5_keywords: - if k in zarr_attrs.keys(): - raise ValueError( - "Can not set attribute %s, this is a reserved N5 keyword" % k - ) - - # replace previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] - - # add new user attributes - n5_attrs.update(**zarr_attrs) - - value = json_dumps(n5_attrs) + return json_dumps(value) - super().__setitem__(key, value) + elif key.endswith(zarr_attrs_key): - def __delitem__(self, key): + key = key.replace(zarr_attrs_key, self.attrs_key) + value = attrs_to_zarr(self._load_n5_attrs(key)) - if key.endswith(zarr_group_meta_key): # pragma: no cover - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - elif key.endswith(zarr_array_meta_key): # pragma: no cover - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - elif key.endswith(zarr_attrs_key): # pragma: no cover - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - - super().__delitem__(key) + if len(value) == 0: + raise KeyError(key) + else: + return json_dumps(value) + return super().__getitem__(key) - def __contains__(self, key): - if key.endswith(zarr_group_meta_key): + def __setitem__(self, key, value): + if key.endswith(zarr_group_meta_key): - key = key.replace(zarr_group_meta_key, self._META_KEYS[0]) - if key not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key) + key = key.replace(zarr_group_meta_key, self.group_meta_key) - elif key.endswith(zarr_array_meta_key): + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - key = key.replace(zarr_array_meta_key, self._META_KEYS[0]) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key) + value = json_dumps(n5_attrs) - elif key.endswith(zarr_attrs_key): + elif key.endswith(zarr_array_meta_key): - key = key.replace(zarr_attrs_key, self._META_KEYS[0]) - return self._contains_attrs(key) + key = key.replace(zarr_array_meta_key, self.array_meta_key) - return super().__contains__(key) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) - def __eq__(self, other): - return isinstance(other, N5FSStore) and self.path == other.path + value = json_dumps(n5_attrs) - def listdir(self, path=None): + elif key.endswith(zarr_attrs_key): - if path is not None: - path = invert_chunk_coords(path) + key = key.replace(zarr_attrs_key, self.attrs_key) - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - if self._is_array(path): + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json_loads(value) - # replace n5 attribute file with respective zarr attribute files - children.remove(self._META_KEYS[0]) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError( + "Can not set attribute %s, this is a reserved N5 keyword" % k + ) - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for dir_path, _, file_names in self.fs.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] - return sorted(new_children) + # add new user attributes + n5_attrs.update(**zarr_attrs) - elif self._is_group(path): + value = json_dumps(n5_attrs) - # replace n5 attribute file with respective zarr attribute files - children.remove(self._META_KEYS[0]) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): # pragma: no cover - children.append(zarr_attrs_key) + super().__setitem__(key, value) - return sorted(children) + def __delitem__(self, key): - else: + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, self.group_meta_key) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, self.array_meta_key) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, self.attrs_key) - return children + super().__delitem__(key) - def _load_n5_attrs(self, path): - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} + def __contains__(self, key): + if key.endswith(zarr_group_meta_key): - def _is_group(self, path): + key = key.replace(zarr_group_meta_key, self.group_meta_key) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return "dimensions" not in self._load_n5_attrs(key) - if path is None: - attrs_key = self._META_KEYS[0] - else: - attrs_key = os.path.join(path, self._META_KEYS[0]) + elif key.endswith(zarr_array_meta_key): - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs + key = key.replace(zarr_array_meta_key, self.array_meta_key) + # array if attributes contain 'dimensions' + return "dimensions" in self._load_n5_attrs(key) - def _is_array(self, path): + elif key.endswith(zarr_attrs_key): - if path is None: - attrs_key = self._META_KEYS[0] - else: - attrs_key = os.path.join(path, self._META_KEYS[0]) + key = key.replace(zarr_attrs_key, self.attrs_key) + return self._contains_attrs(key) - return "dimensions" in self._load_n5_attrs(attrs_key) + return super().__contains__(key) - def _contains_attrs(self, path): + def __eq__(self, other): + return isinstance(other, N5FSStore) and self.path == other.path - if path is None: - attrs_key = self._META_KEYS[0] - else: - if not path.endswith(self._META_KEYS[0]): - attrs_key = os.path.join(path, self._META_KEYS[0]) - else: # pragma: no cover - attrs_key = path + def listdir(self, path=None): - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 + if path is not None: + path = invert_chunk_coords(path) + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super().listdir(path=path) + if self._is_array(path): -def is_chunk_key(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - return _prog_ckey.match(last_segment) - return False # pragma: no cover + # replace n5 attribute file with respective zarr attribute files + children.remove(self.array_meta_key) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and self.fs.isdir(entry_path): + for dir_path, _, file_names in self.fs.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_child = rel_path.replace(os.path.sep, ".") + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) -def invert_chunk_coords(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = '/'.join(segments) - return key + return sorted(new_children) + elif self._is_group(path): -def group_metadata_to_n5(group_metadata): - '''Convert group metadata from zarr to N5 format.''' - del group_metadata['zarr_format'] - # TODO: This should only exist at the top-level - group_metadata['n5'] = '2.0.0' - return group_metadata + # replace n5 attribute file with respective zarr attribute files + children.remove(self.group_meta_key) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + return sorted(children) -def group_metadata_to_zarr(group_metadata): - '''Convert group metadata from N5 to zarr format.''' - # This only exists at the top level - group_metadata.pop('n5', None) - group_metadata['zarr_format'] = ZARR_FORMAT - return group_metadata + else: + return children -def array_metadata_to_n5(array_metadata): - '''Convert array metadata from zarr to N5 format.''' + def _load_n5_attrs(self, path): + try: + s = super().__getitem__(path) + return json_loads(s) + except KeyError: + return {} - for f, t in zarr_to_n5_keys: - array_metadata[t] = array_metadata[f] - del array_metadata[f] - del array_metadata['zarr_format'] + def _is_group(self, path): - try: - dtype = np.dtype(array_metadata['dataType']) - except TypeError: # pragma: no cover - raise TypeError( - "data type %s not supported by N5" % array_metadata['dataType']) + if path is None: + attrs_key = self.attrs_key + else: + attrs_key = os.path.join(path, self.attrs_key) - array_metadata['dataType'] = dtype.name - array_metadata['dimensions'] = array_metadata['dimensions'][::-1] - array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - if 'fill_value' in array_metadata: - if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: - raise ValueError("N5 only supports fill_value == 0 (for now)") - del array_metadata['fill_value'] + def _is_array(self, path): - if 'order' in array_metadata: - if array_metadata['order'] != 'C': - raise ValueError("zarr N5 storage only stores arrays in C order (for now)") - del array_metadata['order'] + if path is None: + attrs_key = self.attrs_key + else: + attrs_key = os.path.join(path, self.attrs_key) - if 'filters' in array_metadata: - if array_metadata['filters'] != [] and array_metadata['filters'] is not None: - raise ValueError("N5 storage does not support zarr filters") - del array_metadata['filters'] + return "dimensions" in self._load_n5_attrs(attrs_key) - assert 'compression' in array_metadata - compressor_config = array_metadata['compression'] - compressor_config = compressor_config_to_n5(compressor_config) - array_metadata['compression'] = compressor_config + def _contains_attrs(self, path): - if 'dimension_separator' in array_metadata: - del array_metadata['dimension_separator'] + if path is None: + attrs_key = self.attrs_key + else: + if not path.endswith(self.attrs_key): + attrs_key = os.path.join(path, self.attrs_key) + else: # pragma: no cover + attrs_key = path + + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 + + def is_chunk_key(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + return _prog_ckey.match(last_segment) + return False # pragma: no cover + + def invert_chunk_coords(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '.'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + + def group_metadata_to_n5(group_metadata): + '''Convert group metadata from zarr to N5 format.''' + del group_metadata['zarr_format'] + # TODO: This should only exist at the top-level + group_metadata['n5'] = '2.0.0' + return group_metadata + + def group_metadata_to_zarr(group_metadata): + '''Convert group metadata from N5 to zarr format.''' + # This only exists at the top level + group_metadata.pop('n5', None) + group_metadata['zarr_format'] = ZARR_FORMAT + return group_metadata + + def array_metadata_to_n5(array_metadata): + '''Convert array metadata from zarr to N5 format.''' + + for f, t in zarr_to_n5_keys: + array_metadata[t] = array_metadata[f] + del array_metadata[f] + del array_metadata['zarr_format'] - return array_metadata + try: + dtype = np.dtype(array_metadata['dataType']) + except TypeError: # pragma: no cover + raise TypeError( + "data type %s not supported by N5" % array_metadata['dataType']) + + array_metadata['dataType'] = dtype.name + array_metadata['dimensions'] = array_metadata['dimensions'][::-1] + array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + + if 'fill_value' in array_metadata: + if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + raise ValueError("N5 only supports fill_value == 0 (for now)") + del array_metadata['fill_value'] + + if 'order' in array_metadata: + if array_metadata['order'] != 'C': + raise ValueError("zarr N5 storage only stores arrays in C order (for now)") + del array_metadata['order'] + + if 'filters' in array_metadata: + if array_metadata['filters'] != [] and array_metadata['filters'] is not None: + raise ValueError("N5 storage does not support zarr filters") + del array_metadata['filters'] + + assert 'compression' in array_metadata + compressor_config = array_metadata['compression'] + compressor_config = compressor_config_to_n5(compressor_config) + array_metadata['compression'] = compressor_config + + if 'dimension_separator' in array_metadata: + del array_metadata['dimension_separator'] + + return array_metadata +except ImportError: + pass def array_metadata_to_zarr(array_metadata): diff --git a/zarr/storage.py b/zarr/storage.py index f0f5ea3392..2142b459d5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1036,28 +1036,32 @@ class FSStore(MutableMapping): exceptions : list of Exception subclasses When accessing data, any of these exceptions will be treated as a missing key - meta_keys : list or tuple of str - Reserved keys for metadata. - Defaults to the zarr metatadata keys, i.e. (".zarray", ".zgroup", ".zattrs"). dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. storage_options : passed to the fsspec implementation """ + array_meta_key = array_meta_key + group_meta_key = group_meta_key + attrs_key = attrs_key + def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', exceptions=(KeyError, PermissionError, IOError), - meta_keys=(array_meta_key, group_meta_key, attrs_key), dimension_separator=None, **storage_options): import fsspec self.normalize_keys = normalize_keys + + protocol, _ = fsspec.core.split_protocol(url) + # set auto_mkdir to True for local file system + if protocol in (None, "file") and not storage_options.get("auto_mkdir"): + storage_options["auto_mkdir"] = True + self.map = fsspec.get_mapper(url, **storage_options) self.fs = self.map.fs # for direct operations self.path = self.fs._strip_protocol(url) self.mode = mode self.exceptions = exceptions - self._META_KEYS = meta_keys - # For backwards compatibility. Guaranteed to be non-None if key_separator is not None: dimension_separator = key_separator @@ -1076,7 +1080,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split('/') - if end not in self._META_KEYS: + if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) @@ -1154,7 +1158,7 @@ def listdir(self, path=None): if self.key_separator != "/": return children else: - if array_meta_key in children: + if self.array_meta_key in children: # special handling of directories containing an array to map nested chunk # keys back to standard chunk keys new_children = [] diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index b1346d760e..d329f4e58f 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -18,7 +18,7 @@ from zarr.core import Array from zarr.meta import json_loads -from zarr.n5 import N5Store, n5_keywords +from zarr.n5 import N5Store, N5FSStore, n5_keywords from zarr.storage import ( ABSStore, DBMStore, @@ -1963,6 +1963,22 @@ def test_hexdigest(self): assert self.expected() == found +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +class TestArrayWithN5FSStore(TestArrayWithN5Store): + + @staticmethod + def create_array(read_only=False, **kwargs): + path = mkdtemp() + atexit.register(shutil.rmtree, path) + store = N5FSStore(path) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + kwargs.setdefault('compressor', Zlib(1)) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) + + class TestArrayWithDBMStore(TestArray): @staticmethod diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 4730393782..9e2f9baf1b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1183,7 +1183,7 @@ def test_value_error(self): class TestN5Store(TestNestedDirectoryStore): def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp(suffix='.n5') + path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = N5Store(path, normalize_keys=normalize_keys) return store @@ -1296,7 +1296,7 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestN5FSStore(TestFSStore): def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp(suffix='.n5') + path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) store = N5FSStore(path, normalize_keys=normalize_keys) return store From ee9cdbc3f18626005e39f36c4e28a8f11d3ed3e9 Mon Sep 17 00:00:00 2001 From: jmoore Date: Wed, 18 Aug 2021 09:51:31 +0200 Subject: [PATCH 16/36] tmp: debug --- zarr/core.py | 4 +++- zarr/storage.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index ba3f2c1e2d..9d8b45554a 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,7 +1952,9 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) + rv = self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) + print(f"CHUNK: {chunk_coords} --> {rv} ({self})") + return rv def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/storage.py b/zarr/storage.py index 0dcfa2899e..d2c8368f58 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1096,6 +1096,7 @@ def __init__(self, url, normalize_keys=False, key_separator=None, raise FSPathExistNotDir(url) def _normalize_key(self, key): + orig = key key = normalize_storage_path(key).lstrip('/') if key: *bits, end = key.split('/') @@ -1104,7 +1105,9 @@ def _normalize_key(self, key): end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) - return key.lower() if self.normalize_keys else key + key = key.lower() if self.normalize_keys else key + print(f"Store: {orig} --> {key} ({self})") + return key def getitems(self, keys, **kwargs): keys_transformed = [self._normalize_key(key) for key in keys] From a853a29960a010e54a8954c7e70342d4d872daf5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 18 Aug 2021 14:24:44 -0400 Subject: [PATCH 17/36] uncomment N5 chunk ordering test --- zarr/tests/test_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 38743ee35c..0b6a598198 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1207,7 +1207,7 @@ def test_chunk_nesting(self): assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] # N5 reverses axis order - # assert b'yyy' == store['foo/30/20/10'] + assert b'yyy' == store['foo/30/20/10'] store['42'] = b'zzz' assert '42' in store assert b'zzz' == store['42'] From 7d3c879309aa92ba5928df2b8ea2e2891d6fe091 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 18 Aug 2021 14:43:55 -0400 Subject: [PATCH 18/36] more commented tests get uncommented --- zarr/tests/test_storage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0b6a598198..70198dc5df 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1154,10 +1154,10 @@ def test_chunk_nesting(self): # any path where last segment looks like a chunk key gets special handling store['0.0'] = b'xxx' assert b'xxx' == store['0.0'] - # assert b'xxx' == store['0/0'] + assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert b'yyy' == store['foo/10.20.30'] - # assert b'yyy' == store['foo/10/20/30'] + assert b'yyy' == store['foo/10/20/30'] store['42'] = b'zzz' assert b'zzz' == store['42'] @@ -1202,7 +1202,7 @@ def test_chunk_nesting(self): store['0.0'] = b'xxx' assert '0.0' in store assert b'xxx' == store['0.0'] - # assert b'xxx' == store['0/0'] + assert b'xxx' == store['0/0'] store['foo/10.20.30'] = b'yyy' assert 'foo/10.20.30' in store assert b'yyy' == store['foo/10.20.30'] From f3ecd7954f6c4ceec559c66678e70d2bfe377822 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 18 Aug 2021 15:16:54 -0400 Subject: [PATCH 19/36] add dimension_separator to array metadata adaptor --- zarr/n5.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/n5.py b/zarr/n5.py index 99ec60f299..1ee520c016 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -627,6 +627,7 @@ def array_metadata_to_zarr(array_metadata): array_metadata['fill_value'] = 0 # also if None was requested array_metadata['order'] = 'C' array_metadata['filters'] = [] + array_metadata['dimension_separator'] = '/' compressor_config = array_metadata['compressor'] compressor_config = compressor_config_to_zarr(compressor_config) From 5a105eb29852950be20e5e5d0429d371e4b862f0 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 09:11:58 +0200 Subject: [PATCH 20/36] Revert "tmp: debug" This reverts commit ee9cdbc3f18626005e39f36c4e28a8f11d3ed3e9. --- zarr/core.py | 4 +--- zarr/storage.py | 5 +---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 9d8b45554a..ba3f2c1e2d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1952,9 +1952,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return self._encode_chunk(chunk) def _chunk_key(self, chunk_coords): - rv = self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) - print(f"CHUNK: {chunk_coords} --> {rv} ({self})") - return rv + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress diff --git a/zarr/storage.py b/zarr/storage.py index 6c75112df7..ebe512cd4b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1101,7 +1101,6 @@ def __init__(self, url, normalize_keys=True, key_separator=None, raise FSPathExistNotDir(url) def _normalize_key(self, key): - orig = key key = normalize_storage_path(key).lstrip('/') if key: *bits, end = key.split('/') @@ -1110,9 +1109,7 @@ def _normalize_key(self, key): end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) - key = key.lower() if self.normalize_keys else key - print(f"Store: {orig} --> {key} ({self})") - return key + return key.lower() if self.normalize_keys else key def getitems(self, keys, **kwargs): keys_transformed = [self._normalize_key(key) for key in keys] From 51b31094d7d31519d5db894153ad96b8944746f1 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 14:05:09 +0200 Subject: [PATCH 21/36] Attempt failed: keeping '.' and switching --- zarr/n5.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 1ee520c016..466d2e7289 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -327,7 +327,7 @@ def __init__(self, *args, **kwargs): if 'dimension_separator' in kwargs: kwargs.pop('dimension_separator') warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "/" + dimension_separator = "." super().__init__(*args, dimension_separator=dimension_separator, **kwargs) def _normalize_key(self, key): @@ -339,7 +339,7 @@ def _normalize_key(self, key): *bits, end = key.split("/") if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): - end = end.replace(".", self.key_separator) + end = end.replace(".", "/") # Hard-code self.key_separator) key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -627,7 +627,7 @@ def array_metadata_to_zarr(array_metadata): array_metadata['fill_value'] = 0 # also if None was requested array_metadata['order'] = 'C' array_metadata['filters'] = [] - array_metadata['dimension_separator'] = '/' + array_metadata['dimension_separator'] = '.' compressor_config = array_metadata['compressor'] compressor_config = compressor_config_to_zarr(compressor_config) From aa75c98fa8b00367c0edcd3cf4a415c443368f3c Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 14:05:12 +0200 Subject: [PATCH 22/36] Revert "Attempt failed: keeping '.' and switching" This reverts commit 51b31094d7d31519d5db894153ad96b8944746f1. --- zarr/n5.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 466d2e7289..1ee520c016 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -327,7 +327,7 @@ def __init__(self, *args, **kwargs): if 'dimension_separator' in kwargs: kwargs.pop('dimension_separator') warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "." + dimension_separator = "/" super().__init__(*args, dimension_separator=dimension_separator, **kwargs) def _normalize_key(self, key): @@ -339,7 +339,7 @@ def _normalize_key(self, key): *bits, end = key.split("/") if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): - end = end.replace(".", "/") # Hard-code self.key_separator) + end = end.replace(".", self.key_separator) key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -627,7 +627,7 @@ def array_metadata_to_zarr(array_metadata): array_metadata['fill_value'] = 0 # also if None was requested array_metadata['order'] = 'C' array_metadata['filters'] = [] - array_metadata['dimension_separator'] = '.' + array_metadata['dimension_separator'] = '/' compressor_config = array_metadata['compressor'] compressor_config = compressor_config_to_zarr(compressor_config) From 3daea7c534cc599830a60db7e14f2610fce793e9 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 14:42:04 +0200 Subject: [PATCH 23/36] regex: attempt failed due to slight diff in files --- zarr/n5.py | 30 +++++++++++++++--------------- zarr/storage.py | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 1ee520c016..043e9b0c6d 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -1,6 +1,7 @@ """This module contains a storage class and codec to support the N5 format. """ import os +import re import struct import sys import warnings @@ -11,7 +12,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path +from .storage import NestedDirectoryStore, _prog_number, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -25,6 +26,9 @@ n5_attrs_key = 'attributes.json' n5_keywords = ['n5', 'dataType', 'dimensions', 'blockSize', 'compression'] +potential_key = re.compile(r'(.*?)/((\d+)(\/\d+)+)$') + + class N5Store(NestedDirectoryStore): """Storage class using directories and files on a standard file system, @@ -143,6 +147,7 @@ def __setitem__(self, key, value): key = invert_chunk_coords(key) + print(key, value) super().__setitem__(key, value) def __delitem__(self, key): @@ -538,22 +543,17 @@ def _contains_attrs(self, path): return len(attrs) > 0 def is_chunk_key(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - return _prog_ckey.match(last_segment) - return False # pragma: no cover + return potential_key.match(key) def invert_chunk_coords(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = '/'.join(segments) - return key + + m = is_chunk_key(key) + if m is None: + return key + + first = m.group(1) + last = "/".join(m.group(2).split("/"))[::-1] # Reverse + return f"{first}/{last}" def group_metadata_to_n5(group_metadata): '''Convert group metadata from zarr to N5 format.''' diff --git a/zarr/storage.py b/zarr/storage.py index ebe512cd4b..3056470842 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1248,7 +1248,7 @@ def __init__(self, suffix='', prefix='zarr', dir=None, normalize_keys=False, super().__init__(path, normalize_keys=normalize_keys) -_prog_ckey = re.compile(r'^(\d+)(\.\d+)+$') +_prog_ckey = re.compile(r'.*?/((\d+)(\/\d+)+)$') _prog_number = re.compile(r'^\d+$') From ce8a79eb53f295fedbf5a51dc42d761d729c6155 Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 14:42:18 +0200 Subject: [PATCH 24/36] Revert "regex: attempt failed due to slight diff in files" This reverts commit 3daea7c534cc599830a60db7e14f2610fce793e9. --- zarr/n5.py | 30 +++++++++++++++--------------- zarr/storage.py | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 043e9b0c6d..1ee520c016 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -1,7 +1,6 @@ """This module contains a storage class and codec to support the N5 format. """ import os -import re import struct import sys import warnings @@ -12,7 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import NestedDirectoryStore, _prog_number, normalize_storage_path +from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key @@ -26,9 +25,6 @@ n5_attrs_key = 'attributes.json' n5_keywords = ['n5', 'dataType', 'dimensions', 'blockSize', 'compression'] -potential_key = re.compile(r'(.*?)/((\d+)(\/\d+)+)$') - - class N5Store(NestedDirectoryStore): """Storage class using directories and files on a standard file system, @@ -147,7 +143,6 @@ def __setitem__(self, key, value): key = invert_chunk_coords(key) - print(key, value) super().__setitem__(key, value) def __delitem__(self, key): @@ -543,17 +538,22 @@ def _contains_attrs(self, path): return len(attrs) > 0 def is_chunk_key(key): - return potential_key.match(key) + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + return _prog_ckey.match(last_segment) + return False # pragma: no cover def invert_chunk_coords(key): - - m = is_chunk_key(key) - if m is None: - return key - - first = m.group(1) - last = "/".join(m.group(2).split("/"))[::-1] # Reverse - return f"{first}/{last}" + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '.'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key def group_metadata_to_n5(group_metadata): '''Convert group metadata from zarr to N5 format.''' diff --git a/zarr/storage.py b/zarr/storage.py index 3056470842..ebe512cd4b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1248,7 +1248,7 @@ def __init__(self, suffix='', prefix='zarr', dir=None, normalize_keys=False, super().__init__(path, normalize_keys=normalize_keys) -_prog_ckey = re.compile(r'.*?/((\d+)(\/\d+)+)$') +_prog_ckey = re.compile(r'^(\d+)(\.\d+)+$') _prog_number = re.compile(r'^\d+$') From 985c2a4b99dc92a11318154e82c12d5b492ebdfb Mon Sep 17 00:00:00 2001 From: jmoore Date: Thu, 19 Aug 2021 14:54:28 +0200 Subject: [PATCH 25/36] N5: use "." internally for dimension separation This allows N5 to detect the split between key and chunks and pre-process them (re-ordering and changing the separator). see: #773 #793 --- zarr/n5.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 1ee520c016..8cb7c28265 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -327,9 +327,21 @@ def __init__(self, *args, **kwargs): if 'dimension_separator' in kwargs: kwargs.pop('dimension_separator') warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "/" + dimension_separator = "." super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + def swap_separator(self, key): + old = key + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '/'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + def _normalize_key(self, key): if is_chunk_key(key): key = invert_chunk_coords(key) @@ -339,7 +351,7 @@ def _normalize_key(self, key): *bits, end = key.split("/") if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): - end = end.replace(".", self.key_separator) + end = end.replace(".", "/") key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -367,6 +379,10 @@ def __getitem__(self, key): raise KeyError(key) else: return json_dumps(value) + + elif is_chunk_key(key): + key = self.swap_separator(key) + return super().__getitem__(key) def __setitem__(self, key, value): @@ -411,6 +427,9 @@ def __setitem__(self, key, value): value = json_dumps(n5_attrs) + elif is_chunk_key(key): + key = self.swap_separator(key) + super().__setitem__(key, value) def __delitem__(self, key): @@ -421,6 +440,8 @@ def __delitem__(self, key): key = key.replace(zarr_array_meta_key, self.array_meta_key) elif key.endswith(zarr_attrs_key): # pragma: no cover key = key.replace(zarr_attrs_key, self.attrs_key) + elif is_chunk_key(key): + key = self.swap_separator(key) super().__delitem__(key) @@ -444,6 +465,9 @@ def __contains__(self, key): key = key.replace(zarr_attrs_key, self.attrs_key) return self._contains_attrs(key) + elif is_chunk_key(key): + key = self.swap_separator(key) + return super().__contains__(key) def __eq__(self, other): @@ -538,11 +562,12 @@ def _contains_attrs(self, path): return len(attrs) > 0 def is_chunk_key(key): + rv = False segments = list(key.split('/')) if segments: last_segment = segments[-1] - return _prog_ckey.match(last_segment) - return False # pragma: no cover + rv = _prog_ckey.match(last_segment) + return rv def invert_chunk_coords(key): segments = list(key.split('/')) @@ -550,7 +575,7 @@ def invert_chunk_coords(key): last_segment = segments[-1] if _prog_ckey.match(last_segment): coords = list(last_segment.split('.')) - last_segment = '.'.join(coords[::-1]) + last_segment = '/'.join(coords[::-1]) segments = segments[:-1] + [last_segment] key = '/'.join(segments) return key @@ -627,7 +652,7 @@ def array_metadata_to_zarr(array_metadata): array_metadata['fill_value'] = 0 # also if None was requested array_metadata['order'] = 'C' array_metadata['filters'] = [] - array_metadata['dimension_separator'] = '/' + array_metadata['dimension_separator'] = '.' compressor_config = array_metadata['compressor'] compressor_config = compressor_config_to_zarr(compressor_config) From 51836df5560f7325c74c279d3a0cd49f6497026c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 19 Aug 2021 11:11:50 -0400 Subject: [PATCH 26/36] move FSSpec import guard --- zarr/n5.py | 169 +++++++++++++++++++++++++++-------------------------- 1 file changed, 86 insertions(+), 83 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 8cb7c28265..ada5ffff71 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -330,8 +330,7 @@ def __init__(self, *args, **kwargs): dimension_separator = "." super().__init__(*args, dimension_separator=dimension_separator, **kwargs) - def swap_separator(self, key): - old = key + def _swap_separator(self, key): segments = list(key.split('/')) if segments: last_segment = segments[-1] @@ -381,7 +380,7 @@ def __getitem__(self, key): return json_dumps(value) elif is_chunk_key(key): - key = self.swap_separator(key) + key = self._swap_separator(key) return super().__getitem__(key) @@ -428,7 +427,7 @@ def __setitem__(self, key, value): value = json_dumps(n5_attrs) elif is_chunk_key(key): - key = self.swap_separator(key) + key = self._swap_separator(key) super().__setitem__(key, value) @@ -441,7 +440,7 @@ def __delitem__(self, key): elif key.endswith(zarr_attrs_key): # pragma: no cover key = key.replace(zarr_attrs_key, self.attrs_key) elif is_chunk_key(key): - key = self.swap_separator(key) + key = self._swap_separator(key) super().__delitem__(key) @@ -466,7 +465,7 @@ def __contains__(self, key): return self._contains_attrs(key) elif is_chunk_key(key): - key = self.swap_separator(key) + key = self._swap_separator(key) return super().__contains__(key) @@ -474,7 +473,6 @@ def __eq__(self, other): return isinstance(other, N5FSStore) and self.path == other.path def listdir(self, path=None): - if path is not None: path = invert_chunk_coords(path) @@ -560,86 +558,91 @@ def _contains_attrs(self, path): attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) return len(attrs) > 0 - - def is_chunk_key(key): - rv = False - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - rv = _prog_ckey.match(last_segment) - return rv - - def invert_chunk_coords(key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = '/'.join(segments) - return key - - def group_metadata_to_n5(group_metadata): - '''Convert group metadata from zarr to N5 format.''' - del group_metadata['zarr_format'] - # TODO: This should only exist at the top-level - group_metadata['n5'] = '2.0.0' - return group_metadata - - def group_metadata_to_zarr(group_metadata): - '''Convert group metadata from N5 to zarr format.''' - # This only exists at the top level - group_metadata.pop('n5', None) - group_metadata['zarr_format'] = ZARR_FORMAT - return group_metadata - - def array_metadata_to_n5(array_metadata): - '''Convert array metadata from zarr to N5 format.''' - - for f, t in zarr_to_n5_keys: - array_metadata[t] = array_metadata[f] - del array_metadata[f] - del array_metadata['zarr_format'] - - try: - dtype = np.dtype(array_metadata['dataType']) - except TypeError: # pragma: no cover - raise TypeError( - "data type %s not supported by N5" % array_metadata['dataType']) - - array_metadata['dataType'] = dtype.name - array_metadata['dimensions'] = array_metadata['dimensions'][::-1] - array_metadata['blockSize'] = array_metadata['blockSize'][::-1] - - if 'fill_value' in array_metadata: - if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: - raise ValueError("N5 only supports fill_value == 0 (for now)") - del array_metadata['fill_value'] - - if 'order' in array_metadata: - if array_metadata['order'] != 'C': - raise ValueError("zarr N5 storage only stores arrays in C order (for now)") - del array_metadata['order'] - - if 'filters' in array_metadata: - if array_metadata['filters'] != [] and array_metadata['filters'] is not None: - raise ValueError("N5 storage does not support zarr filters") - del array_metadata['filters'] - - assert 'compression' in array_metadata - compressor_config = array_metadata['compression'] - compressor_config = compressor_config_to_n5(compressor_config) - array_metadata['compression'] = compressor_config - - if 'dimension_separator' in array_metadata: - del array_metadata['dimension_separator'] - - return array_metadata except ImportError: pass +def is_chunk_key(key): + rv = False + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + rv = _prog_ckey.match(last_segment) + return rv + + +def invert_chunk_coords(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '/'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + + +def group_metadata_to_n5(group_metadata): + '''Convert group metadata from zarr to N5 format.''' + del group_metadata['zarr_format'] + # TODO: This should only exist at the top-level + group_metadata['n5'] = '2.0.0' + return group_metadata + + +def group_metadata_to_zarr(group_metadata): + '''Convert group metadata from N5 to zarr format.''' + # This only exists at the top level + group_metadata.pop('n5', None) + group_metadata['zarr_format'] = ZARR_FORMAT + return group_metadata + + +def array_metadata_to_n5(array_metadata): + '''Convert array metadata from zarr to N5 format.''' + + for f, t in zarr_to_n5_keys: + array_metadata[t] = array_metadata[f] + del array_metadata[f] + del array_metadata['zarr_format'] + + try: + dtype = np.dtype(array_metadata['dataType']) + except TypeError: # pragma: no cover + raise TypeError( + "data type %s not supported by N5" % array_metadata['dataType']) + + array_metadata['dataType'] = dtype.name + array_metadata['dimensions'] = array_metadata['dimensions'][::-1] + array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + + if 'fill_value' in array_metadata: + if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + raise ValueError("N5 only supports fill_value == 0 (for now)") + del array_metadata['fill_value'] + + if 'order' in array_metadata: + if array_metadata['order'] != 'C': + raise ValueError("zarr N5 storage only stores arrays in C order (for now)") + del array_metadata['order'] + + if 'filters' in array_metadata: + if array_metadata['filters'] != [] and array_metadata['filters'] is not None: + raise ValueError("N5 storage does not support zarr filters") + del array_metadata['filters'] + + assert 'compression' in array_metadata + compressor_config = array_metadata['compression'] + compressor_config = compressor_config_to_n5(compressor_config) + array_metadata['compression'] = compressor_config + + if 'dimension_separator' in array_metadata: + del array_metadata['dimension_separator'] + + return array_metadata + + def array_metadata_to_zarr(array_metadata): '''Convert array metadata from N5 to zarr format.''' for t, f in zarr_to_n5_keys: From 3c5da2f69b0d4e5724b6fa08fa3bb8bf16339a6e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 19 Aug 2021 15:41:37 -0400 Subject: [PATCH 27/36] remove os.path.sep concatenation in listdir that was erroring a test, and add a mea culpa docstring about the dimension_separator for n5 stores --- zarr/n5.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/zarr/n5.py b/zarr/n5.py index ada5ffff71..e8e36615de 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -318,6 +318,20 @@ class N5FSStore(FSStore): ----- This is an experimental feature. Safe to write in multiple threads or processes. + + Be advised that the `_dimension_separator` property of this store + (and arrays it creates) is ".", but chunks saved by this store will + in fact be "/" separated, as proscribed by the N5 format. + + This is counter-intuitive (to say the least), but not arbitrary. + Chunks in N5 format are stored with reversed dimension order + relative to Zarr chunks: a chunk of a 3D Zarr array would be stored + on a file system as `/0/1/2`, but in N5 the same chunk would be + stored as `/2/1/0`. Therefore, stores targeting N5 must intercept + chunk keys and flip the order of the dimensions before writing to + storage, and this procedure requires chunk keys with "." separated + dimensions, hence the Zarr arrays targeting N5 have the deceptive + "." dimension separator. """ array_meta_key = 'attributes.json' group_meta_key = 'attributes.json' @@ -498,7 +512,7 @@ def listdir(self, path=None): for dir_path, _, file_names in self.fs.walk(entry_path): for file_name in file_names: file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path + os.path.sep)[1] + rel_path = file_path.split(root_path)[1] new_child = rel_path.replace(os.path.sep, ".") new_children.append(invert_chunk_coords(new_child)) else: From b8fe803d1c4c067d436914c4d73ba9df4be2c5b0 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 13 Sep 2021 19:54:11 -0400 Subject: [PATCH 28/36] resolve merge conflicts in favor of upstream --- zarr/tests/test_dim_separator.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/zarr/tests/test_dim_separator.py b/zarr/tests/test_dim_separator.py index 518dd226fb..566745e665 100644 --- a/zarr/tests/test_dim_separator.py +++ b/zarr/tests/test_dim_separator.py @@ -1,8 +1,5 @@ -<<<<<<< HEAD -======= import pathlib ->>>>>>> upstream/master import pytest from numpy.testing import assert_array_equal @@ -12,12 +9,9 @@ from zarr.tests.util import have_fsspec -<<<<<<< HEAD -======= needs_fsspec = pytest.mark.skipif(not have_fsspec, reason="needs fsspec") ->>>>>>> upstream/master @pytest.fixture(params=("static_nested", "static_flat", "directory_nested", @@ -25,15 +19,9 @@ "directory_default", "nesteddirectory_nested", "nesteddirectory_default", -<<<<<<< HEAD - "fs_nested", - "fs_flat", - "fs_default")) -======= pytest.param("fs_nested", marks=needs_fsspec), pytest.param("fs_flat", marks=needs_fsspec), pytest.param("fs_default", marks=needs_fsspec))) ->>>>>>> upstream/master def dataset(tmpdir, request): """ Generate a variety of different Zarrs using @@ -46,12 +34,6 @@ def dataset(tmpdir, request): kwargs = {} if which.startswith("static"): -<<<<<<< HEAD - if which.endswith("nested"): - return "fixture/nested" - else: - return "fixture/flat" -======= project_root = pathlib.Path(zarr.__file__).resolve().parent.parent if which.endswith("nested"): static = project_root / "fixture/nested" @@ -67,18 +49,12 @@ def dataset(tmpdir, request): a[:] = [[1, 2], [3, 4]] return str(static) ->>>>>>> upstream/master if which.startswith("directory"): store_class = DirectoryStore elif which.startswith("nested"): store_class = NestedDirectoryStore else: -<<<<<<< HEAD - if have_fsspec is False: - pytest.skip("no fsspec") -======= ->>>>>>> upstream/master store_class = FSStore kwargs["mode"] = "w" kwargs["auto_mkdir"] = True @@ -98,16 +74,10 @@ def verify(array): def test_open(dataset): -<<<<<<< HEAD - verify(zarr.open(dataset)) - - -======= verify(zarr.open(dataset, "r")) @needs_fsspec ->>>>>>> upstream/master def test_fsstore(dataset): verify(Array(store=FSStore(dataset))) From 8fec1d6904a70dc735c11e55c6ecf9c1bed5d720 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 13 Sep 2021 21:30:14 -0400 Subject: [PATCH 29/36] make listdir implementation for n5fsstore look more like fsstore's listdir, and add crucial lstrip --- zarr/n5.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index e8e36615de..92d328de8a 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -509,15 +509,13 @@ def listdir(self, path=None): for entry in children: entry_path = os.path.join(root_path, entry) if _prog_number.match(entry) and self.fs.isdir(entry_path): - for dir_path, _, file_names in self.fs.walk(entry_path): - for file_name in file_names: - file_path = os.path.join(dir_path, file_name) - rel_path = file_path.split(root_path)[1] - new_child = rel_path.replace(os.path.sep, ".") - new_children.append(invert_chunk_coords(new_child)) + for file_name in self.fs.find(entry_path): + file_path = os.path.join(root_path, file_name) + rel_path = file_path.split(root_path)[1] + new_child = rel_path.lstrip('/').replace('/', ".") + new_children.append(invert_chunk_coords(new_child)) else: new_children.append(entry) - return sorted(new_children) elif self._is_group(path): @@ -527,11 +525,8 @@ def listdir(self, path=None): children.append(zarr_group_meta_key) if self._contains_attrs(path): # pragma: no cover children.append(zarr_attrs_key) - return sorted(children) - else: - return children def _load_n5_attrs(self, path): From 46ebb44d023758b42704ae49470fded957fc3c0e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Sep 2021 18:25:03 -0400 Subject: [PATCH 30/36] Update hexdigest tests for N5Stores to account for the presence of the dimension_separator keyword now present in metadata --- zarr/tests/test_core.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 326c1d8c8c..be2feffe8a 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1984,12 +1984,12 @@ def test_compressors(self): def expected(self): return [ - 'c6b83adfad999fbd865057531d749d87cf138f58', - 'a3d6d187536ecc3a9dd6897df55d258e2f52f9c5', - 'ec2e008525ae09616dbc1d2408cbdb42532005c8', - 'b63f031031dcd5248785616edcb2d6fe68203c28', - '0cfc673215a8292a87f3c505e2402ce75243c601', - ] + '4e9cf910000506455f82a70938a272a3fce932e5', + 'f9d4cbf1402901f63dea7acf764d2546e4b6aa38', + '1d8199f5f7b70d61aa0d29cc375212c3df07d50a', + '874880f91aa6736825584509144afe6b06b0c05c', + 'e2258fedc74752196a8c8383db49e27193c995e2', + ] def test_hexdigest(self): found = [] From 864773d89c297eb9fc7ddd49f87eeb8e74510964 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 15 Sep 2021 18:30:07 -0400 Subject: [PATCH 31/36] Add tests for dimension_separator in array meta for N5Stores --- zarr/tests/test_storage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0b6a598198..d0d1e22b14 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1228,6 +1228,7 @@ def test_init_array(self): assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 assert meta['fill_value'] == 0 + assert meta['dimension_separator'] == '.' def test_init_array_path(self): path = 'foo/bar' @@ -1326,6 +1327,7 @@ def test_init_array(self): assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 assert meta['fill_value'] == 0 + assert meta['dimension_separator'] == '.' def test_init_array_path(self): path = 'foo/bar' From 3b56155e851ccf34a3a1996b880c2536dd3c71aa Mon Sep 17 00:00:00 2001 From: jmoore Date: Fri, 17 Sep 2021 08:47:06 +0200 Subject: [PATCH 32/36] N5FSStore: try to increase code coverage * Adds a test for the dimension_separator warning * uses the parent test_complex for listdir * "nocover" the import error since fsspec is ever present --- zarr/n5.py | 2 +- zarr/tests/test_storage.py | 60 +++++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 92d328de8a..130bb8803e 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -567,7 +567,7 @@ def _contains_attrs(self, path): attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) return len(attrs) > 0 -except ImportError: +except ImportError: # pragma: no cover pass diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index d0d1e22b14..6fc9cca477 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -900,13 +900,20 @@ def mock_walker_no_slash(_path): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestFSStore(StoreTests): - def create_store(self, normalize_keys=False, dimension_separator="."): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) + def create_store(self, normalize_keys=False, + dimension_separator=".", + path=None, + **kwargs): + + if path is None: + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = FSStore( path, normalize_keys=normalize_keys, - dimension_separator=dimension_separator) + dimension_separator=dimension_separator, + **kwargs) return store def test_init_array(self): @@ -937,8 +944,9 @@ def test_dimension_separator(self): def test_complex(self): path1 = tempfile.mkdtemp() path2 = tempfile.mkdtemp() - store = FSStore("simplecache::file://" + path1, - simplecache={"same_names": True, "cache_storage": path2}) + store = self.create_store(path="simplecache::file://" + path1, + simplecache={"same_names": True, + "cache_storage": path2}) assert not store assert not os.listdir(path1) assert not os.listdir(path2) @@ -949,6 +957,20 @@ def test_complex(self): assert store["foo"] == b"hello" assert 'foo' in os.listdir(path2) + def test_deep_ndim(self): + import zarr + + store = self.create_store() + foo = zarr.open_group(store=store) + bar = foo.create_group("bar") + baz = bar.create_dataset("baz", + shape=(4, 4, 4), + chunks=(2, 2, 2), + dtype="i8") + baz[:] = 1 + assert set(store.listdir()) == set([".zgroup", "bar"]) + assert foo["bar"]["baz"][(0, 0, 0)] == 1 + def test_not_fsspec(self): import zarr path = tempfile.mkdtemp() @@ -979,10 +1001,10 @@ def test_create(self): def test_read_only(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStore(path) + store = self.create_store(path=path) store['foo'] = b"bar" - store = FSStore(path, mode='r') + store = self.create_store(path=path, mode='r') with pytest.raises(PermissionError): store['foo'] = b"hex" @@ -1000,11 +1022,11 @@ def test_read_only(self): filepath = os.path.join(path, "foo") with pytest.raises(ValueError): - FSStore(filepath, mode='r') + self.create_store(path=filepath, mode='r') def test_eq(self): - store1 = FSStore("anypath") - store2 = FSStore("anypath") + store1 = self.create_store(path="anypath") + store2 = self.create_store(path="anypath") assert store1 == store2 @pytest.mark.usefixtures("s3") @@ -1300,10 +1322,13 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestN5FSStore(TestFSStore): - def create_store(self, normalize_keys=False): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = N5FSStore(path, normalize_keys=normalize_keys) + def create_store(self, normalize_keys=False, path=None, **kwargs): + + if path is None: + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + + store = N5FSStore(path, normalize_keys=normalize_keys, **kwargs) return store def test_equal(self): @@ -1375,8 +1400,9 @@ def test_init_group_overwrite_chunk_store(self): self._test_init_group_overwrite_chunk_store('C') def test_dimension_separator(self): - with pytest.raises(TypeError): - self.create_store(key_separator='.') + + with pytest.warns(UserWarning, match='dimension_separator'): + self.create_store(dimension_separator='/') @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") From b0f6d335971048382916285d04499e184b4b86b6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 17 Sep 2021 12:31:41 -0400 Subject: [PATCH 33/36] flake8 --- zarr/n5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/n5.py b/zarr/n5.py index 130bb8803e..49bdf40ec4 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -567,7 +567,7 @@ def _contains_attrs(self, path): attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) return len(attrs) > 0 -except ImportError: # pragma: no cover +except ImportError: # pragma: no cover pass From 82ce89f4f544fab14543d09cfd8e9e20546906ba Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 17 Sep 2021 12:32:11 -0400 Subject: [PATCH 34/36] add chunk nesting test to N5FSStore test suite --- zarr/tests/test_storage.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 6fc9cca477..1412ec2099 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1336,6 +1336,24 @@ def test_equal(self): store_b = N5FSStore(store_a.path) assert store_a == store_b + # This is copied wholesale from the N5Store tests. The same test could + # be run by making TestN5FSStore inherit from both TestFSStore and + # TestN5Store, but a direct copy is arguably more explicit. + def test_chunk_nesting(self): + store = self.create_store() + store['0.0'] = b'xxx' + assert '0.0' in store + assert b'xxx' == store['0.0'] + # assert b'xxx' == store['0/0'] + store['foo/10.20.30'] = b'yyy' + assert 'foo/10.20.30' in store + assert b'yyy' == store['foo/10.20.30'] + # N5 reverses axis order + assert b'yyy' == store['foo/30/20/10'] + store['42'] = b'zzz' + assert '42' in store + assert b'zzz' == store['42'] + def test_init_array(self): store = self.create_store() init_array(store, shape=1000, chunks=100) From 2b854108288bef96d10b965efede20e8f8da1c69 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 17 Sep 2021 19:06:21 -0400 Subject: [PATCH 35/36] make array_meta_key, group_meta_key, attrs_key private --- zarr/n5.py | 50 ++++++++++++++++++++++++------------------------- zarr/storage.py | 10 +++++----- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 49bdf40ec4..745939a040 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -333,9 +333,9 @@ class N5FSStore(FSStore): dimensions, hence the Zarr arrays targeting N5 have the deceptive "." dimension separator. """ - array_meta_key = 'attributes.json' - group_meta_key = 'attributes.json' - attrs_key = 'attributes.json' + _array_meta_key = 'attributes.json' + _group_meta_key = 'attributes.json' + _attrs_key = 'attributes.json' def __init__(self, *args, **kwargs): if 'dimension_separator' in kwargs: @@ -363,7 +363,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split("/") - if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): + if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): end = end.replace(".", "/") key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -371,21 +371,21 @@ def _normalize_key(self, key): def __getitem__(self, key): if key.endswith(zarr_group_meta_key): - key = key.replace(zarr_group_meta_key, self.group_meta_key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) value = group_metadata_to_zarr(self._load_n5_attrs(key)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key = key.replace(zarr_array_meta_key, self.array_meta_key) + key = key.replace(zarr_array_meta_key, self._array_meta_key) value = array_metadata_to_zarr(self._load_n5_attrs(key)) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key = key.replace(zarr_attrs_key, self.attrs_key) + key = key.replace(zarr_attrs_key, self._attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key)) if len(value) == 0: @@ -401,7 +401,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): if key.endswith(zarr_group_meta_key): - key = key.replace(zarr_group_meta_key, self.group_meta_key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) n5_attrs = self._load_n5_attrs(key) n5_attrs.update(**group_metadata_to_n5(json_loads(value))) @@ -410,7 +410,7 @@ def __setitem__(self, key, value): elif key.endswith(zarr_array_meta_key): - key = key.replace(zarr_array_meta_key, self.array_meta_key) + key = key.replace(zarr_array_meta_key, self._array_meta_key) n5_attrs = self._load_n5_attrs(key) n5_attrs.update(**array_metadata_to_n5(json_loads(value))) @@ -419,7 +419,7 @@ def __setitem__(self, key, value): elif key.endswith(zarr_attrs_key): - key = key.replace(zarr_attrs_key, self.attrs_key) + key = key.replace(zarr_attrs_key, self._attrs_key) n5_attrs = self._load_n5_attrs(key) zarr_attrs = json_loads(value) @@ -448,11 +448,11 @@ def __setitem__(self, key, value): def __delitem__(self, key): if key.endswith(zarr_group_meta_key): # pragma: no cover - key = key.replace(zarr_group_meta_key, self.group_meta_key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) elif key.endswith(zarr_array_meta_key): # pragma: no cover - key = key.replace(zarr_array_meta_key, self.array_meta_key) + key = key.replace(zarr_array_meta_key, self._array_meta_key) elif key.endswith(zarr_attrs_key): # pragma: no cover - key = key.replace(zarr_attrs_key, self.attrs_key) + key = key.replace(zarr_attrs_key, self._attrs_key) elif is_chunk_key(key): key = self._swap_separator(key) @@ -461,7 +461,7 @@ def __delitem__(self, key): def __contains__(self, key): if key.endswith(zarr_group_meta_key): - key = key.replace(zarr_group_meta_key, self.group_meta_key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) if key not in self: return False # group if not a dataset (attributes do not contain 'dimensions') @@ -469,13 +469,13 @@ def __contains__(self, key): elif key.endswith(zarr_array_meta_key): - key = key.replace(zarr_array_meta_key, self.array_meta_key) + key = key.replace(zarr_array_meta_key, self._array_meta_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key) elif key.endswith(zarr_attrs_key): - key = key.replace(zarr_attrs_key, self.attrs_key) + key = key.replace(zarr_attrs_key, self._attrs_key) return self._contains_attrs(key) elif is_chunk_key(key): @@ -497,7 +497,7 @@ def listdir(self, path=None): if self._is_array(path): # replace n5 attribute file with respective zarr attribute files - children.remove(self.array_meta_key) + children.remove(self._array_meta_key) children.append(zarr_array_meta_key) if self._contains_attrs(path): children.append(zarr_attrs_key) @@ -521,7 +521,7 @@ def listdir(self, path=None): elif self._is_group(path): # replace n5 attribute file with respective zarr attribute files - children.remove(self.group_meta_key) + children.remove(self._group_meta_key) children.append(zarr_group_meta_key) if self._contains_attrs(path): # pragma: no cover children.append(zarr_attrs_key) @@ -539,9 +539,9 @@ def _load_n5_attrs(self, path): def _is_group(self, path): if path is None: - attrs_key = self.attrs_key + attrs_key = self._attrs_key else: - attrs_key = os.path.join(path, self.attrs_key) + attrs_key = os.path.join(path, self._attrs_key) n5_attrs = self._load_n5_attrs(attrs_key) return len(n5_attrs) > 0 and "dimensions" not in n5_attrs @@ -549,19 +549,19 @@ def _is_group(self, path): def _is_array(self, path): if path is None: - attrs_key = self.attrs_key + attrs_key = self._attrs_key else: - attrs_key = os.path.join(path, self.attrs_key) + attrs_key = os.path.join(path, self._attrs_key) return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path): if path is None: - attrs_key = self.attrs_key + attrs_key = self._attrs_key else: - if not path.endswith(self.attrs_key): - attrs_key = os.path.join(path, self.attrs_key) + if not path.endswith(self._attrs_key): + attrs_key = os.path.join(path, self._attrs_key) else: # pragma: no cover attrs_key = path diff --git a/zarr/storage.py b/zarr/storage.py index afa09e261a..395551687f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1065,9 +1065,9 @@ class FSStore(MutableMapping): Separator placed between the dimensions of a chunk. storage_options : passed to the fsspec implementation """ - array_meta_key = array_meta_key - group_meta_key = group_meta_key - attrs_key = attrs_key + _array_meta_key = array_meta_key + _group_meta_key = group_meta_key + _attrs_key = attrs_key def __init__(self, url, normalize_keys=True, key_separator=None, mode='w', @@ -1105,7 +1105,7 @@ def _normalize_key(self, key): if key: *bits, end = key.split('/') - if end not in (self.array_meta_key, self.group_meta_key, self.attrs_key): + if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): end = end.replace('.', self.key_separator) key = '/'.join(bits + [end]) @@ -1183,7 +1183,7 @@ def listdir(self, path=None): if self.key_separator != "/": return children else: - if self.array_meta_key in children: + if self._array_meta_key in children: # special handling of directories containing an array to map nested chunk # keys back to standard chunk keys new_children = [] From aa4a723ecb9c776314f03f0a1abf14bacc10c215 Mon Sep 17 00:00:00 2001 From: jmoore Date: Sun, 19 Sep 2021 15:58:54 +0200 Subject: [PATCH 36/36] N5FSStore: Remove ImportError test FSStore only throws ModuleNotFoundError on initialization rather than on import. Therefore N5FSStore does the same. If this *weren't* the case, then the import in zarr/init would need to test the import as well, which isn't the case. --- zarr/n5.py | 468 ++++++++++++++++++++++++++--------------------------- 1 file changed, 232 insertions(+), 236 deletions(-) diff --git a/zarr/n5.py b/zarr/n5.py index 745939a040..797558fa2d 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -11,6 +11,7 @@ from numcodecs.registry import get_codec, register_codec from .meta import ZARR_FORMAT, json_dumps, json_loads +from .storage import FSStore from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path from .storage import array_meta_key as zarr_array_meta_key from .storage import attrs_key as zarr_attrs_key @@ -281,294 +282,289 @@ def _contains_attrs(self, path): return len(attrs) > 0 -try: - from .storage import FSStore - - class N5FSStore(FSStore): - """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, - which allows storage on a variety of filesystems. Based on `zarr.N5Store`. - Parameters - ---------- - path : string - Location of directory to use as the root of the storage hierarchy. - normalize_keys : bool, optional - If True, all store keys will be normalized to use lower case characters - (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be - useful to avoid potential discrepancies between case-senstive and - case-insensitive file system. Default value is False. - - Examples - -------- - Store a single array:: - - >>> import zarr - >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) - >>> z[...] = 42 - - Store a group:: - - >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) - >>> root = zarr.group(store=store, overwrite=True) - >>> foo = root.create_group('foo') - >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) - >>> bar[...] = 42 - - Notes - ----- - This is an experimental feature. - Safe to write in multiple threads or processes. - - Be advised that the `_dimension_separator` property of this store - (and arrays it creates) is ".", but chunks saved by this store will - in fact be "/" separated, as proscribed by the N5 format. - - This is counter-intuitive (to say the least), but not arbitrary. - Chunks in N5 format are stored with reversed dimension order - relative to Zarr chunks: a chunk of a 3D Zarr array would be stored - on a file system as `/0/1/2`, but in N5 the same chunk would be - stored as `/2/1/0`. Therefore, stores targeting N5 must intercept - chunk keys and flip the order of the dimensions before writing to - storage, and this procedure requires chunk keys with "." separated - dimensions, hence the Zarr arrays targeting N5 have the deceptive - "." dimension separator. - """ - _array_meta_key = 'attributes.json' - _group_meta_key = 'attributes.json' - _attrs_key = 'attributes.json' - - def __init__(self, *args, **kwargs): - if 'dimension_separator' in kwargs: - kwargs.pop('dimension_separator') - warnings.warn('Keyword argument `dimension_separator` will be ignored') - dimension_separator = "." - super().__init__(*args, dimension_separator=dimension_separator, **kwargs) - - def _swap_separator(self, key): - segments = list(key.split('/')) - if segments: - last_segment = segments[-1] - if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) - segments = segments[:-1] + [last_segment] - key = '/'.join(segments) - return key - - def _normalize_key(self, key): - if is_chunk_key(key): - key = invert_chunk_coords(key) - - key = normalize_storage_path(key).lstrip("/") - if key: - *bits, end = key.split("/") - - if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace(".", "/") - key = "/".join(bits + [end]) - return key.lower() if self.normalize_keys else key - - def __getitem__(self, key): - if key.endswith(zarr_group_meta_key): - - key = key.replace(zarr_group_meta_key, self._group_meta_key) - value = group_metadata_to_zarr(self._load_n5_attrs(key)) +class N5FSStore(FSStore): + """Implentation of the N5 format (https://github.com/saalfeldlab/n5) using `fsspec`, + which allows storage on a variety of filesystems. Based on `zarr.N5Store`. + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + normalize_keys : bool, optional + If True, all store keys will be normalized to use lower case characters + (e.g. 'foo' and 'FOO' will be treated as equivalent). This can be + useful to avoid potential discrepancies between case-senstive and + case-insensitive file system. Default value is False. - return json_dumps(value) + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.N5FSStore('data/array.n5', auto_mkdir=True) + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 - elif key.endswith(zarr_array_meta_key): + Store a group:: - key = key.replace(zarr_array_meta_key, self._array_meta_key) - value = array_metadata_to_zarr(self._load_n5_attrs(key)) + >>> store = zarr.N5FSStore('data/group.n5', auto_mkdir=True) + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + + Notes + ----- + This is an experimental feature. + Safe to write in multiple threads or processes. + + Be advised that the `_dimension_separator` property of this store + (and arrays it creates) is ".", but chunks saved by this store will + in fact be "/" separated, as proscribed by the N5 format. + + This is counter-intuitive (to say the least), but not arbitrary. + Chunks in N5 format are stored with reversed dimension order + relative to Zarr chunks: a chunk of a 3D Zarr array would be stored + on a file system as `/0/1/2`, but in N5 the same chunk would be + stored as `/2/1/0`. Therefore, stores targeting N5 must intercept + chunk keys and flip the order of the dimensions before writing to + storage, and this procedure requires chunk keys with "." separated + dimensions, hence the Zarr arrays targeting N5 have the deceptive + "." dimension separator. + """ + _array_meta_key = 'attributes.json' + _group_meta_key = 'attributes.json' + _attrs_key = 'attributes.json' + + def __init__(self, *args, **kwargs): + if 'dimension_separator' in kwargs: + kwargs.pop('dimension_separator') + warnings.warn('Keyword argument `dimension_separator` will be ignored') + dimension_separator = "." + super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + + def _swap_separator(self, key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '/'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + + def _normalize_key(self, key): + if is_chunk_key(key): + key = invert_chunk_coords(key) + + key = normalize_storage_path(key).lstrip("/") + if key: + *bits, end = key.split("/") + + if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): + end = end.replace(".", "/") + key = "/".join(bits + [end]) + return key.lower() if self.normalize_keys else key + + def __getitem__(self, key): + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, self._group_meta_key) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, self._array_meta_key) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value) + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, self._attrs_key) + value = attrs_to_zarr(self._load_n5_attrs(key)) + + if len(value) == 0: + raise KeyError(key) + else: return json_dumps(value) - elif key.endswith(zarr_attrs_key): + elif is_chunk_key(key): + key = self._swap_separator(key) - key = key.replace(zarr_attrs_key, self._attrs_key) - value = attrs_to_zarr(self._load_n5_attrs(key)) + return super().__getitem__(key) - if len(value) == 0: - raise KeyError(key) - else: - return json_dumps(value) + def __setitem__(self, key, value): + if key.endswith(zarr_group_meta_key): - elif is_chunk_key(key): - key = self._swap_separator(key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) - return super().__getitem__(key) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - def __setitem__(self, key, value): - if key.endswith(zarr_group_meta_key): + value = json_dumps(n5_attrs) - key = key.replace(zarr_group_meta_key, self._group_meta_key) + elif key.endswith(zarr_array_meta_key): - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**group_metadata_to_n5(json_loads(value))) + key = key.replace(zarr_array_meta_key, self._array_meta_key) - value = json_dumps(n5_attrs) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json_loads(value))) - elif key.endswith(zarr_array_meta_key): + value = json_dumps(n5_attrs) - key = key.replace(zarr_array_meta_key, self._array_meta_key) + elif key.endswith(zarr_attrs_key): - n5_attrs = self._load_n5_attrs(key) - n5_attrs.update(**array_metadata_to_n5(json_loads(value))) + key = key.replace(zarr_attrs_key, self._attrs_key) - value = json_dumps(n5_attrs) + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json_loads(value) - elif key.endswith(zarr_attrs_key): + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError( + "Can not set attribute %s, this is a reserved N5 keyword" % k + ) - key = key.replace(zarr_attrs_key, self._attrs_key) + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] - n5_attrs = self._load_n5_attrs(key) - zarr_attrs = json_loads(value) + # add new user attributes + n5_attrs.update(**zarr_attrs) - for k in n5_keywords: - if k in zarr_attrs.keys(): - raise ValueError( - "Can not set attribute %s, this is a reserved N5 keyword" % k - ) + value = json_dumps(n5_attrs) - # replace previous user attributes - for k in list(n5_attrs.keys()): - if k not in n5_keywords: - del n5_attrs[k] + elif is_chunk_key(key): + key = self._swap_separator(key) - # add new user attributes - n5_attrs.update(**zarr_attrs) + super().__setitem__(key, value) - value = json_dumps(n5_attrs) + def __delitem__(self, key): - elif is_chunk_key(key): - key = self._swap_separator(key) + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, self._group_meta_key) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, self._array_meta_key) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, self._attrs_key) + elif is_chunk_key(key): + key = self._swap_separator(key) - super().__setitem__(key, value) + super().__delitem__(key) - def __delitem__(self, key): + def __contains__(self, key): + if key.endswith(zarr_group_meta_key): - if key.endswith(zarr_group_meta_key): # pragma: no cover - key = key.replace(zarr_group_meta_key, self._group_meta_key) - elif key.endswith(zarr_array_meta_key): # pragma: no cover - key = key.replace(zarr_array_meta_key, self._array_meta_key) - elif key.endswith(zarr_attrs_key): # pragma: no cover - key = key.replace(zarr_attrs_key, self._attrs_key) - elif is_chunk_key(key): - key = self._swap_separator(key) + key = key.replace(zarr_group_meta_key, self._group_meta_key) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return "dimensions" not in self._load_n5_attrs(key) - super().__delitem__(key) + elif key.endswith(zarr_array_meta_key): - def __contains__(self, key): - if key.endswith(zarr_group_meta_key): + key = key.replace(zarr_array_meta_key, self._array_meta_key) + # array if attributes contain 'dimensions' + return "dimensions" in self._load_n5_attrs(key) - key = key.replace(zarr_group_meta_key, self._group_meta_key) - if key not in self: - return False - # group if not a dataset (attributes do not contain 'dimensions') - return "dimensions" not in self._load_n5_attrs(key) + elif key.endswith(zarr_attrs_key): - elif key.endswith(zarr_array_meta_key): + key = key.replace(zarr_attrs_key, self._attrs_key) + return self._contains_attrs(key) - key = key.replace(zarr_array_meta_key, self._array_meta_key) - # array if attributes contain 'dimensions' - return "dimensions" in self._load_n5_attrs(key) + elif is_chunk_key(key): + key = self._swap_separator(key) - elif key.endswith(zarr_attrs_key): + return super().__contains__(key) - key = key.replace(zarr_attrs_key, self._attrs_key) - return self._contains_attrs(key) + def __eq__(self, other): + return isinstance(other, N5FSStore) and self.path == other.path - elif is_chunk_key(key): - key = self._swap_separator(key) + def listdir(self, path=None): + if path is not None: + path = invert_chunk_coords(path) - return super().__contains__(key) + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super().listdir(path=path) + if self._is_array(path): - def __eq__(self, other): - return isinstance(other, N5FSStore) and self.path == other.path + # replace n5 attribute file with respective zarr attribute files + children.remove(self._array_meta_key) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) - def listdir(self, path=None): - if path is not None: - path = invert_chunk_coords(path) + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and self.fs.isdir(entry_path): + for file_name in self.fs.find(entry_path): + file_path = os.path.join(root_path, file_name) + rel_path = file_path.split(root_path)[1] + new_child = rel_path.lstrip('/').replace('/', ".") + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) + return sorted(new_children) - # We can't use NestedDirectoryStore's listdir, as it requires - # array_meta_key to be present in array directories, which this store - # doesn't provide. - children = super().listdir(path=path) - if self._is_array(path): + elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files - children.remove(self._array_meta_key) - children.append(zarr_array_meta_key) - if self._contains_attrs(path): - children.append(zarr_attrs_key) + # replace n5 attribute file with respective zarr attribute files + children.remove(self._group_meta_key) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + return sorted(children) + else: + return children - # special handling of directories containing an array to map - # inverted nested chunk keys back to standard chunk keys - new_children = [] - root_path = self.dir_path(path) - for entry in children: - entry_path = os.path.join(root_path, entry) - if _prog_number.match(entry) and self.fs.isdir(entry_path): - for file_name in self.fs.find(entry_path): - file_path = os.path.join(root_path, file_name) - rel_path = file_path.split(root_path)[1] - new_child = rel_path.lstrip('/').replace('/', ".") - new_children.append(invert_chunk_coords(new_child)) - else: - new_children.append(entry) - return sorted(new_children) - - elif self._is_group(path): - - # replace n5 attribute file with respective zarr attribute files - children.remove(self._group_meta_key) - children.append(zarr_group_meta_key) - if self._contains_attrs(path): # pragma: no cover - children.append(zarr_attrs_key) - return sorted(children) - else: - return children + def _load_n5_attrs(self, path): + try: + s = super().__getitem__(path) + return json_loads(s) + except KeyError: + return {} - def _load_n5_attrs(self, path): - try: - s = super().__getitem__(path) - return json_loads(s) - except KeyError: - return {} + def _is_group(self, path): - def _is_group(self, path): + if path is None: + attrs_key = self._attrs_key + else: + attrs_key = os.path.join(path, self._attrs_key) - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs - n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and "dimensions" not in n5_attrs + def _is_array(self, path): - def _is_array(self, path): + if path is None: + attrs_key = self._attrs_key + else: + attrs_key = os.path.join(path, self._attrs_key) - if path is None: - attrs_key = self._attrs_key - else: - attrs_key = os.path.join(path, self._attrs_key) + return "dimensions" in self._load_n5_attrs(attrs_key) - return "dimensions" in self._load_n5_attrs(attrs_key) + def _contains_attrs(self, path): - def _contains_attrs(self, path): + if path is None: + attrs_key = self._attrs_key + else: + if not path.endswith(self._attrs_key): + attrs_key = os.path.join(path, self._attrs_key) + else: # pragma: no cover + attrs_key = path - if path is None: - attrs_key = self._attrs_key - else: - if not path.endswith(self._attrs_key): - attrs_key = os.path.join(path, self._attrs_key) - else: # pragma: no cover - attrs_key = path - - attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) - return len(attrs) > 0 -except ImportError: # pragma: no cover - pass + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 def is_chunk_key(key):