zarr-developers · maxrjones · Mar 9, 2025 · Mar 13, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/docs/releases.rst b/docs/releases.rst
@@ -9,6 +9,8 @@ v1.3.3 (unreleased)
 New Features
 ~~~~~~~~~~~~
 
+- Added experimental VirtualObjectStore for loading data directly from virtual datasets.
+
 Breaking changes
 ~~~~~~~~~~~~~~~~
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,9 @@ remote = [
     "aiohttp",
     "s3fs",
 ]
-
+obstore = [
+    "obstore @ git+https://github.com/developmentseed/obstore@main#subdirectory=obstore",
+]
 # non-kerchunk-based readers
 hdf = [
     "virtualizarr[remote]",
@@ -64,11 +66,16 @@ fits = [
     "kerchunk>=0.2.8",
     "astropy",
 ]
+tif = [
+    "obstore @ git+https://github.com/developmentseed/obstore@main#subdirectory=obstore",
+    "async-tiff @ git+https://github.com/developmentseed/async-tiff#subdirectory=python",
+]
 all_readers = [
     "virtualizarr[hdf]",
     "virtualizarr[hdf5]",
     "virtualizarr[netcdf3]",
     "virtualizarr[fits]",
+    "virtualizarr[tif]",
 ]
 
 # writers
@@ -157,6 +164,9 @@ h5netcdf = ">=1.5.0,<2"
 [tool.pixi.feature.icechunk-dev.dependencies]
 rust = "*"
 
+[tool.pixi.feature.rio.dependencies]
+rioxarray = "*"
+
 # Define commands to run within the test environments
 [tool.pixi.feature.dev.tasks]
 run-mypy = { cmd = "mypy virtualizarr" }
@@ -170,11 +180,11 @@ run-tests-html-cov = { cmd = "pytest --run-network-tests --verbose --cov=virtual
 [tool.pixi.environments]
 min-deps = ["dev", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
 # Inherit from min-deps to get all the test commands, along with optional dependencies
-test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib"]
-test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py311"] # test against python 3.11
-test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312"] # test against python 3.12
+test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio"]
+test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py311"] # test against python 3.11
+test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "rio", "py312"] # test against python 3.12
 upstream = ["dev", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
-all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "all_readers", "all_writers"]
+all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "tif", "all_readers", "all_writers"]
 docs = ["docs"]
 
 

diff --git a/virtualizarr/__init__.py b/virtualizarr/__init__.py
@@ -1,5 +1,6 @@
 from virtualizarr.manifests import ChunkManifest, ManifestArray  # type: ignore # noqa
 from virtualizarr.accessor import VirtualiZarrDatasetAccessor  # type: ignore # noqa
+
 from virtualizarr.backend import open_virtual_dataset  # noqa: F401
 
 from importlib.metadata import version as _version

diff --git a/virtualizarr/accessor.py b/virtualizarr/accessor.py
@@ -1,8 +1,13 @@
+from __future__ import annotations
+
 from datetime import datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Callable, Literal, overload
 
-from xarray import Dataset, register_dataset_accessor
+from xarray import (
+    Dataset,
+    register_dataset_accessor,
+)
 
 from virtualizarr.manifests import ManifestArray
 from virtualizarr.types.kerchunk import KerchunkStoreRefs

diff --git a/virtualizarr/manifests/__init__.py b/virtualizarr/manifests/__init__.py
@@ -2,4 +2,5 @@
 # This is just to avoid conflicting with some type of file called manifest that .gitignore recommends ignoring.
 
 from .array import ManifestArray  # type: ignore # noqa
+from .group import ManifestGroup  # type: ignore # noqa
 from .manifest import ChunkEntry, ChunkManifest  # type: ignore # noqa
diff --git a/virtualizarr/manifests/group.py b/virtualizarr/manifests/group.py
@@ -0,0 +1,36 @@
+from typing import TypeAlias
+
+from zarr.core.group import GroupMetadata
+
+from virtualizarr.manifests import ManifestArray
+
+ManifestDict: TypeAlias = dict[str, ManifestArray]
+
+
+class ManifestGroup:
+    """
+    Virtualized representation of multiple ManifestArrays as a Zarr Group.
+    """
+
+    _manifest_dict: ManifestDict
+    _metadata: GroupMetadata
+
+    def __init__(
+        self,
+        manifest_dict: ManifestDict,
+        attributes: dict,
+    ) -> None:
+        """
+        Create a ManifestGroup from the dictionary of ManifestArrays and the group / dataset level metadata
+
+        Parameters
+        ----------
+        attributes : attributes to include in Group metadata
+        manifest_dict : ManifestDict
+        """
+
+        self._metadata = GroupMetadata(attributes=attributes)
+        self._manifest_dict = manifest_dict
+
+    def __str__(self) -> str:
+        return f"ManifestGroup({self._manifest_dict}, {self._metadata})"
diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py
@@ -17,6 +17,7 @@ def create_v3_array_metadata(
     fill_value: Any = None,
     codecs: Optional[list[Dict[str, Any]]] = None,
     attributes: Optional[Dict[str, Any]] = None,
+    dimension_names: Optional[tuple[str, ...]] = None,
 ) -> ArrayV3Metadata:
     """
     Create an ArrayV3Metadata instance with standard configuration.
@@ -36,6 +37,8 @@ def create_v3_array_metadata(
         List of codec configurations
     attributes : Dict[str, Any], optional
         Additional attributes for the array
+    dimension_names : tuple[str], optional
+        Names of the dimensions
 
     Returns
     -------
@@ -56,7 +59,7 @@ def create_v3_array_metadata(
             dtype=data_type,
         ),
         attributes=attributes or {},
-        dimension_names=None,
+        dimension_names=dimension_names,
         storage_transformers=None,
     )
 

diff --git a/virtualizarr/readers/common.py b/virtualizarr/readers/common.py
@@ -1,14 +1,19 @@
+import dataclasses
 from abc import ABC
 from collections.abc import Iterable, Mapping, MutableMapping
 from typing import (
     Any,
     Hashable,
     Optional,
+    TypedDict,
 )
 
+import numpy as np
 import xarray  # noqa
+from numcodecs.abc import Codec
 from xarray import (
     Coordinates,
+    DataArray,
     Dataset,
     DataTree,
     Index,
@@ -21,6 +26,26 @@
 from virtualizarr.utils import _FsspecFSFromFilepath
 
 
+@dataclasses.dataclass
+class ZstdProperties:
+    level: int
+
+
+@dataclasses.dataclass
+class ShuffleProperties:
+    elementsize: int
+
+
+@dataclasses.dataclass
+class ZlibProperties:
+    level: int
+
+
+class CFCodec(TypedDict):
+    target_dtype: np.dtype
+    codec: Codec
+
+
 def maybe_open_loadable_vars_and_indexes(
     filepath: str,
     loadable_variables,
@@ -86,6 +111,24 @@
     return loadable_vars, indexes
 
 
+def construct_virtual_dataarray(
+    virtual_var,
+    coord_vars: Optional[Variable] = None,
+    name: Optional[str] = None,
+    dims: Optional[Hashable] = None,
+    attrs: Optional[dict] = None,
+) -> DataArray:
+    """Construct a virtual DataArray from consistuent parts."""
+    vda = DataArray(
+        data=virtual_var,
+        coords=coord_vars,
+        attrs=attrs,
+        dims=dims,
+        name=name,
+    )
+    return vda
+
+
 def construct_virtual_dataset(
     virtual_vars,
     loadable_vars,

diff --git a/virtualizarr/readers/hdf/filters.py b/virtualizarr/readers/hdf/filters.py
@@ -1,12 +1,18 @@
 import dataclasses
-from typing import TYPE_CHECKING, List, Tuple, TypedDict, Union
+from typing import TYPE_CHECKING, List, Tuple, Union
 
 import numcodecs.registry as registry
 import numpy as np
 from numcodecs.abc import Codec
 from numcodecs.fixedscaleoffset import FixedScaleOffset
 from xarray.coding.variables import _choose_float_dtype
 
+from virtualizarr.readers.common import (
+    CFCodec,
+    ShuffleProperties,
+    ZlibProperties,
+    ZstdProperties,
+)
 from virtualizarr.utils import soft_import
 
 if TYPE_CHECKING:
@@ -52,26 +58,6 @@ def __post_init__(self):
         self.cname = blosc_compressor_codes[self.cname]
 
 
-@dataclasses.dataclass
-class ZstdProperties:
-    level: int
-
-
-@dataclasses.dataclass
-class ShuffleProperties:
-    elementsize: int
-
-
-@dataclasses.dataclass
-class ZlibProperties:
-    level: int
-
-
-class CFCodec(TypedDict):
-    target_dtype: np.dtype
-    codec: Codec
-
-
 def _filter_to_codec(
     filter_id: str, filter_properties: Union[int, None, Tuple] = None
 ) -> Codec: