zarr-developers · jsignell · Jan 30, 2025 · Jan 30, 2025
diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py
@@ -1,8 +1,10 @@
 import importlib
 import itertools
 
+import fsspec
 import numpy as np
 import pytest
+import xarray as xr
 from packaging.version import Version
 
 from virtualizarr.manifests import ChunkManifest, ManifestArray
@@ -105,3 +107,15 @@ def offset_from_chunk_key(ind: tuple[int, ...]) -> int:
 
 def length_from_chunk_key(ind: tuple[int, ...]) -> int:
     return sum(ind) + 5
+
+
+def open_dataset_kerchunk(
+    filename_or_obj: str, *, storage_options=None, **kwargs
+) -> xr.Dataset:
+    """Equivalent to ``xr.open_dataset(..., engine="kerchunk")`` but without depending on
+    kerchunk library
+    """
+    m = fsspec.filesystem(
+        "reference", fo=filename_or_obj, **(storage_options or {})
+    ).get_mapper()
+    return xr.open_dataset(m, engine="zarr", consolidated=False, **kwargs)
diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py
@@ -15,6 +15,7 @@
 from virtualizarr.readers.hdf import HDFVirtualBackend
 from virtualizarr.tests import (
     has_astropy,
+    open_dataset_kerchunk,
     parametrize_over_hdf_backends,
     requires_hdf5plugin,
     requires_imagecodecs,
@@ -321,7 +322,7 @@ def test_virtualizarr_vs_local_nisar(self, hdf_backend):
         )
         tmpref = "/tmp/cmip6.json"
         vds.virtualize.to_kerchunk(tmpref, format="json")
-        dsV = xr.open_dataset(tmpref, engine="kerchunk")
+        dsV = open_dataset_kerchunk(tmpref)
 
         # xrt.assert_identical(dsXR, dsV) #Attribute order changes
         xrt.assert_equal(dsXR, dsV)

diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py
@@ -8,7 +8,13 @@
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
-from virtualizarr.tests import parametrize_over_hdf_backends, requires_kerchunk
+from virtualizarr.tests import (
+    has_kerchunk,
+    open_dataset_kerchunk,
+    parametrize_over_hdf_backends,
+    requires_kerchunk,
+    requires_zarr_python,
+)
 from virtualizarr.translators.kerchunk import (
     dataset_from_kerchunk_refs,
 )
@@ -84,8 +90,10 @@ def test_numpy_arrays_to_inlined_kerchunk_refs(
     assert refs["refs"]["time/0"] == expected["refs"]["time/0"]
 
 
-@requires_kerchunk
-@pytest.mark.parametrize("format", ["dict", "json", "parquet"])
+@requires_zarr_python
+@pytest.mark.parametrize(
+    "format", ["dict", "json", "parquet"] if has_kerchunk else ["dict", "json"]
+)
 class TestKerchunkRoundtrip:
     @parametrize_over_hdf_backends
     def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend):
@@ -103,14 +111,14 @@ def test_kerchunk_roundtrip_no_concat(self, tmpdir, format, hdf_backend):
             ds_refs = vds.virtualize.to_kerchunk(format=format)
 
             # use fsspec to read the dataset from the kerchunk references dict
-            roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False)
+            roundtrip = open_dataset_kerchunk(ds_refs, decode_times=False)
         else:
             # write those references to disk as kerchunk references format
             vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
 
             # use fsspec to read the dataset from disk via the kerchunk references
-            roundtrip = xr.open_dataset(
-                f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
+            roundtrip = open_dataset_kerchunk(
+                f"{tmpdir}/refs.{format}", decode_times=False
             )
 
         # assert all_close to original dataset
@@ -164,16 +172,14 @@ def test_kerchunk_roundtrip_concat(
             ds_refs = vds.virtualize.to_kerchunk(format=format)
 
             # use fsspec to read the dataset from the kerchunk references dict
-            roundtrip = xr.open_dataset(
-                ds_refs, engine="kerchunk", decode_times=decode_times
-            )
+            roundtrip = open_dataset_kerchunk(ds_refs, decode_times=decode_times)
         else:
             # write those references to disk as kerchunk references format
             vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
 
             # use fsspec to read the dataset from disk via the kerchunk references
-            roundtrip = xr.open_dataset(
-                f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=decode_times
+            roundtrip = open_dataset_kerchunk(
+                f"{tmpdir}/refs.{format}", decode_times=decode_times
             )
 
         if decode_times is False:
@@ -214,14 +220,14 @@ def test_non_dimension_coordinates(self, tmpdir, format, hdf_backend):
             ds_refs = vds.virtualize.to_kerchunk(format=format)
 
             # use fsspec to read the dataset from the kerchunk references dict
-            roundtrip = xr.open_dataset(ds_refs, engine="kerchunk", decode_times=False)
+            roundtrip = open_dataset_kerchunk(ds_refs, decode_times=False)
         else:
             # write those references to disk as kerchunk references format
             vds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
 
             # use fsspec to read the dataset from disk via the kerchunk references
-            roundtrip = xr.open_dataset(
-                f"{tmpdir}/refs.{format}", engine="kerchunk", decode_times=False
+            roundtrip = open_dataset_kerchunk(
+                f"{tmpdir}/refs.{format}", decode_times=False
             )
 
         # assert equal to original dataset
@@ -265,13 +271,13 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format):
             ds_refs = ds.virtualize.to_kerchunk(format=format)
 
             # use fsspec to read the dataset from the kerchunk references dict
-            roundtrip = xr.open_dataset(ds_refs, engine="kerchunk")
+            roundtrip = open_dataset_kerchunk(ds_refs)
         else:
             # write those references to disk as kerchunk references format
             ds.virtualize.to_kerchunk(f"{tmpdir}/refs.{format}", format=format)
 
             # use fsspec to read the dataset from disk via the kerchunk references
-            roundtrip = xr.open_dataset(f"{tmpdir}/refs.{format}", engine="kerchunk")
+            roundtrip = open_dataset_kerchunk(f"{tmpdir}/refs.{format}")
 
         assert roundtrip.a.attrs == ds.a.attrs
 

diff --git a/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py b/virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py
@@ -4,16 +4,21 @@
 
 import virtualizarr
 from virtualizarr.readers.hdf import HDFVirtualBackend
-from virtualizarr.tests import requires_kerchunk
+from virtualizarr.tests import (
+    open_dataset_kerchunk,
+    requires_hdf5plugin,
+    requires_imagecodecs,
+)
 
 
-@requires_kerchunk
+@requires_hdf5plugin
+@requires_imagecodecs
 class TestIntegration:
     @pytest.mark.xfail(
         reason="0 time start is being interpreted as fillvalue see issues/280"
     )
     def test_filters_h5netcdf_roundtrip(
-        self, tmpdir, filter_encoded_roundtrip_hdf5_file, backend=HDFVirtualBackend
+        self, tmpdir, filter_encoded_roundtrip_hdf5_file
     ):
         ds = xr.open_dataset(filter_encoded_roundtrip_hdf5_file, decode_times=True)
         vds = virtualizarr.open_virtual_dataset(
@@ -24,7 +29,7 @@ def test_filters_h5netcdf_roundtrip(
         )
         kerchunk_file = f"{tmpdir}/kerchunk.json"
         vds.virtualize.to_kerchunk(kerchunk_file, format="json")
-        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk", decode_times=True)
+        roundtrip = open_dataset_kerchunk(kerchunk_file, decode_times=True)
         xrt.assert_allclose(ds, roundtrip)
 
     @pytest.mark.xfail(
@@ -37,8 +42,8 @@ def test_filters_netcdf4_roundtrip(
         ds = xr.open_dataset(filepath)
         vds = virtualizarr.open_virtual_dataset(filepath, backend=HDFVirtualBackend)
         kerchunk_file = f"{tmpdir}/kerchunk.json"
-        vds.virtualize.to_kerchunk(kerchunk_file, format="json")
-        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
+        vds.virtualize.to_kerchunk(kerchunk_file, format="dict")
+        roundtrip = open_dataset_kerchunk(kerchunk_file)
         xrt.assert_equal(ds, roundtrip)
 
     def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file):
@@ -48,5 +53,5 @@ def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file
         )
         kerchunk_file = f"{tmpdir}/filter_cf_kerchunk.json"
         vds.virtualize.to_kerchunk(kerchunk_file, format="json")
-        roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
+        roundtrip = open_dataset_kerchunk(kerchunk_file)
         xrt.assert_allclose(ds, roundtrip)