Skip to content

Commit

Permalink
Merge branch 'main' into main-kerchunk
Browse files Browse the repository at this point in the history
  • Loading branch information
jsignell authored Jan 31, 2025
2 parents 76dcfb7 + 61847e9 commit 2d6f4fb
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
18 changes: 18 additions & 0 deletions virtualizarr/readers/hdf/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ def _virtual_vars_from_hdf(
group_name = "/"

variables = {}
non_coordinate_dimesion_vars = HDFVirtualBackend._find_non_coord_dimension_vars(
group=g
)
drop_variables = list(set(drop_variables + non_coordinate_dimesion_vars))
for key in g.keys():
if key not in drop_variables:
if isinstance(g[key], h5py.Dataset):
Expand Down Expand Up @@ -403,3 +407,17 @@ def _get_group_attrs(
g = f
attrs = HDFVirtualBackend._extract_attrs(g)
return attrs

@staticmethod
def _find_non_coord_dimension_vars(group: H5Group) -> List[str]:
dimension_names = []
non_coordinate_dimension_variables = []
for name, obj in group.items():
if "_Netcdf4Dimid" in obj.attrs:
dimension_names.append(name)
for name, obj in group.items():
if type(obj) is h5py.Dataset:
if obj.id.get_storage_size() == 0 and name in dimension_names:
non_coordinate_dimension_variables.append(name)

return non_coordinate_dimension_variables
12 changes: 2 additions & 10 deletions virtualizarr/tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,6 @@ def test_var_attr_coords(self, netcdf4_file_with_2d_coords, hdf_backend):
+ expected_2d_coords
+ expected_1d_non_dimension_coords
+ expected_scalar_coords
# These should not be included in coords see #401 for more information
+ (["xi_rho", "eta_rho"] if hdf_backend == HDFVirtualBackend else [])
)
assert set(vds.coords) == set(expected_coords)

Expand Down Expand Up @@ -344,10 +342,7 @@ def test_open_subgroup(
indexes={},
backend=hdf_backend,
)
# This should just be ["bar"] see #401 for more information
assert list(vds.variables) == (
["bar", "dim_0"] if hdf_backend == HDFVirtualBackend else ["bar"]
)
assert list(vds.variables) == ["bar"]
assert isinstance(vds["bar"].data, ManifestArray)
assert vds["bar"].shape == (2,)

Expand All @@ -364,10 +359,7 @@ def test_open_root_group(
indexes={},
backend=hdf_backend,
)
# This should just be ["foo"] see #401 for more information
assert list(vds.variables) == (
["foo", "dim_0"] if hdf_backend == HDFVirtualBackend else ["foo"]
)
assert list(vds.variables) == ["foo"]
assert isinstance(vds["foo"].data, ManifestArray)
assert vds["foo"].shape == (3,)

Expand Down
9 changes: 9 additions & 0 deletions virtualizarr/tests/test_readers/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,12 @@ def netcdf3_file(tmp_path: pathlib.Path) -> pathlib.Path:
ds.to_netcdf(filepath, format="NETCDF3_CLASSIC")

return filepath


@pytest.fixture
def non_coord_dim(tmpdir):
filepath = f"{tmpdir}/non_coord_dim.nc"
ds = create_test_data(dim_sizes=(20, 80, 10))
ds = ds.drop_dims("dim3")
ds.to_netcdf(filepath, engine="netcdf4")
return filepath
13 changes: 10 additions & 3 deletions virtualizarr/tests/test_readers/test_hdf/test_hdf_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def test_filters_h5netcdf_roundtrip(
roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk", decode_times=True)
xrt.assert_allclose(ds, roundtrip)

@pytest.mark.xfail(
reason="Coordinate issue affecting only hdf reader see pull/#260"
)
def test_filters_netcdf4_roundtrip(
self, tmpdir, filter_encoded_roundtrip_netcdf4_file
):
Expand All @@ -56,3 +53,13 @@ def test_filter_and_cf_roundtrip(self, tmpdir, filter_and_cf_roundtrip_hdf5_file
vds.virtualize.to_kerchunk(kerchunk_file, format="json")
roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
xrt.assert_allclose(ds, roundtrip)

def test_non_coord_dim(self, tmpdir, non_coord_dim):
ds = xr.open_dataset(non_coord_dim)
vds = virtualizarr.open_virtual_dataset(
non_coord_dim, backend=HDFVirtualBackend
)
kerchunk_file = f"{tmpdir}/kerchunk.json"
vds.virtualize.to_kerchunk(kerchunk_file, format="json")
roundtrip = xr.open_dataset(kerchunk_file, engine="kerchunk")
xrt.assert_equal(ds, roundtrip)
6 changes: 3 additions & 3 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,11 @@ def test_mixture_of_manifestarrays_and_numpy_arrays(
@requires_imagecodecs
def test_nbytes(simple_netcdf4):
vds = open_virtual_dataset(simple_netcdf4)
assert vds.virtualize.nbytes == 88
assert vds.nbytes == 104
assert vds.virtualize.nbytes == 32
assert vds.nbytes == 48

vds = open_virtual_dataset(simple_netcdf4, loadable_variables=["foo"])
assert vds.virtualize.nbytes == 104
assert vds.virtualize.nbytes == 48

ds = open_dataset(simple_netcdf4)
assert ds.virtualize.nbytes == ds.nbytes

0 comments on commit 2d6f4fb

Please sign in to comment.