Skip to content

Commit f4d9f5b

Browse files
maxrjonesTomNicholaskylebarronpre-commit-ci[bot]
authored
Add ManifestStore for loading data from ManifestArrays (#490)
* Draft ManifestStore implementation --------- Co-authored-by: Tom Nicholas <[email protected]> Co-authored-by: Kyle Barron <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 206277f commit f4d9f5b

File tree

10 files changed

+637
-5
lines changed

10 files changed

+637
-5
lines changed

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"numpy": ("https://numpy.org/doc/stable/", None),
4141
"zarr": ("https://zarr.readthedocs.io/en/stable/", None),
4242
"xarray": ("https://docs.xarray.dev/en/stable/", None),
43+
"obstore": ("https://developmentseed.org/obstore/latest/", None),
4344
}
4445

4546
# Add any paths that contain templates here, relative to this directory.

docs/releases.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ v1.3.3 (unreleased)
99
New Features
1010
~~~~~~~~~~~~
1111

12+
- Added experimental ManifestStore (:pull:`490`).
13+
1214
Breaking changes
1315
~~~~~~~~~~~~~~~~
1416

pyproject.toml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ remote = [
3838
"aiohttp",
3939
"s3fs",
4040
]
41-
41+
obstore = [
42+
"obstore>=0.5.1",
43+
]
4244
# non-kerchunk-based readers
4345
hdf = [
4446
"virtualizarr[remote]",
@@ -172,11 +174,11 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov
172174
[tool.pixi.environments]
173175
min-deps = ["dev", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
174176
# Inherit from min-deps to get all the test commands, along with optional dependencies
175-
test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib"]
176-
test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py311"] # test against python 3.11
177-
test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312"] # test against python 3.12
177+
test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
178+
test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
179+
test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
178180
upstream = ["dev", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
179-
all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "all_readers", "all_writers"]
181+
all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
180182
docs = ["docs"]
181183

182184

virtualizarr/manifests/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22
# This is just to avoid conflicting with some type of file called manifest that .gitignore recommends ignoring.
33

44
from .array import ManifestArray # type: ignore # noqa
5+
from .group import ManifestGroup # type: ignore # noqa
56
from .manifest import ChunkEntry, ChunkManifest # type: ignore # noqa
7+
from .store import ManifestStore # type: ignore # noqa

virtualizarr/manifests/group.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Mapping, TypeAlias
2+
3+
from zarr.core.group import GroupMetadata
4+
5+
from virtualizarr.manifests import ManifestArray
6+
7+
ManifestArrayVariableMapping: TypeAlias = dict[str, ManifestArray]
8+
9+
10+
class ManifestGroup:
11+
"""
12+
Virtualized representation of multiple ManifestArrays as a Zarr Group.
13+
"""
14+
15+
# TODO: Consider refactoring according to https://github.com/zarr-developers/VirtualiZarr/pull/490#discussion_r2007805272
16+
_manifest_arrays: Mapping[str, ManifestArray]
17+
_metadata: GroupMetadata
18+
19+
def __init__(
20+
self,
21+
manifest_arrays: ManifestArrayVariableMapping,
22+
attributes: dict,
23+
) -> None:
24+
"""
25+
Create a ManifestGroup from the dictionary of ManifestArrays and the group / dataset level metadata
26+
27+
Parameters
28+
----------
29+
attributes : attributes to include in Group metadata
30+
manifest_dict : ManifestArrayVariableMapping
31+
"""
32+
33+
self._metadata = GroupMetadata(attributes=attributes)
34+
self._manifest_arrays = manifest_arrays
35+
36+
def __str__(self) -> str:
37+
return f"ManifestGroup(manifest_arrays={self._manifest_arrays}, metadata={self._metadata})"

0 commit comments

Comments
 (0)