Skip to content

Commit 1d7da8d

Browse files
committed
Cleanup test_backends
1 parent 375895a commit 1d7da8d

File tree

2 files changed

+86
-77
lines changed

2 files changed

+86
-77
lines changed

xarray/structure/combine.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from xarray.core.dataarray import DataArray
1111
from xarray.core.dataset import Dataset
1212
from xarray.core.utils import iterate_nested
13+
from xarray.structure.alignment import AlignmentError
1314
from xarray.structure.concat import concat
1415
from xarray.structure.merge import merge
1516
from xarray.util.deprecation_helpers import (
@@ -334,13 +335,20 @@ def _combine_1d(
334335
else:
335336
raise
336337
else:
337-
combined = merge(
338-
datasets,
339-
compat=compat,
340-
fill_value=fill_value,
341-
join=join,
342-
combine_attrs=combine_attrs,
343-
)
338+
try:
339+
combined = merge(
340+
datasets,
341+
compat=compat,
342+
fill_value=fill_value,
343+
join=join,
344+
combine_attrs=combine_attrs,
345+
)
346+
except AlignmentError as e:
347+
e.add_note(
348+
"If you are intending to concatenate datasets, please specify the concatenation dimension explicitly. "
349+
"Using merge to concatenate is quite inefficient."
350+
)
351+
raise e
344352

345353
return combined
346354

xarray/tests/test_backends.py

Lines changed: 71 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from xarray.core.types import PDDatetimeUnitOptions
6161
from xarray.core.utils import module_available
6262
from xarray.namedarray.pycompat import array_type
63+
from xarray.structure.alignment import AlignmentError
6364
from xarray.tests import (
6465
assert_allclose,
6566
assert_array_equal,
@@ -4795,19 +4796,19 @@ class TestOpenMFDatasetWithDataVarsAndCoordsKw:
47954796
var_name = "v1"
47964797

47974798
@contextlib.contextmanager
4798-
def setup_files_and_datasets(self, fuzz=0):
4799+
def setup_files_and_datasets(self, *, fuzz=0, new_combine_kwargs: bool = False):
47994800
ds1, ds2 = self.gen_datasets_with_common_coord_and_time()
48004801

48014802
# to test join='exact'
48024803
ds1["x"] = ds1.x + fuzz
48034804

4804-
with set_options(use_new_combine_kwarg_defaults=True):
4805-
with create_tmp_file() as tmpfile1:
4806-
with create_tmp_file() as tmpfile2:
4807-
# save data to the temporary files
4808-
ds1.to_netcdf(tmpfile1)
4809-
ds2.to_netcdf(tmpfile2)
4805+
with create_tmp_file() as tmpfile1:
4806+
with create_tmp_file() as tmpfile2:
4807+
# save data to the temporary files
4808+
ds1.to_netcdf(tmpfile1)
4809+
ds2.to_netcdf(tmpfile2)
48104810

4811+
with set_options(use_new_combine_kwarg_defaults=new_combine_kwargs):
48114812
yield [tmpfile1, tmpfile2], [ds1, ds2]
48124813

48134814
def gen_datasets_with_common_coord_and_time(self):
@@ -4850,7 +4851,7 @@ def test_open_mfdataset_does_same_as_concat(
48504851
combine=combine,
48514852
concat_dim=concat_dim,
48524853
join=join,
4853-
compat="no_conflicts",
4854+
compat="equals",
48544855
) as ds:
48554856
ds_expect = xr.concat(
48564857
[ds1, ds2], data_vars=opt, dim="t", join=join, compat="equals"
@@ -4935,25 +4936,29 @@ def test_open_mfdataset_dataset_attr_by_coords(self) -> None:
49354936
ds.close()
49364937
ds.to_netcdf(f)
49374938

4938-
with xr.open_mfdataset(files, combine="nested", concat_dim="t") as ds:
4939-
assert ds.test_dataset_attr == 10
4939+
with set_options(use_new_combine_kwarg_defaults=True):
4940+
with xr.open_mfdataset(files, combine="nested", concat_dim="t") as ds:
4941+
assert ds.test_dataset_attr == 10
49404942

49414943
def test_open_mfdataset_dataarray_attr_by_coords(self) -> None:
49424944
"""
49434945
Case when an attribute of a member DataArray differs across the multiple files
49444946
"""
4945-
with self.setup_files_and_datasets() as (files, [ds1, ds2]):
4947+
with self.setup_files_and_datasets(new_combine_kwargs=True) as (
4948+
files,
4949+
[ds1, ds2],
4950+
):
49464951
# Give the files an inconsistent attribute
49474952
for i, f in enumerate(files):
49484953
ds = open_dataset(f).load()
49494954
ds["v1"].attrs["test_dataarray_attr"] = i
49504955
ds.close()
49514956
ds.to_netcdf(f)
49524957

4953-
with xr.open_mfdataset(
4954-
files, data_vars="minimal", combine="nested", concat_dim="t"
4955-
) as ds:
4956-
assert ds["v1"].test_dataarray_attr == 0
4958+
with xr.open_mfdataset(
4959+
files, data_vars="minimal", combine="nested", concat_dim="t"
4960+
) as ds:
4961+
assert ds["v1"].test_dataarray_attr == 0
49574962

49584963
@pytest.mark.parametrize(
49594964
"combine, concat_dim", [("nested", "t"), ("by_coords", None)]
@@ -4980,9 +4985,13 @@ def test_open_mfdataset_dataarray_attr_by_coords(self) -> None:
49804985
def test_open_mfdataset_exact_join_raises_error(
49814986
self, combine, concat_dim, kwargs
49824987
) -> None:
4983-
with self.setup_files_and_datasets(fuzz=0.1) as (files, _):
4988+
with self.setup_files_and_datasets(fuzz=0.1, new_combine_kwargs=True) as (
4989+
files,
4990+
_,
4991+
):
49844992
if combine == "by_coords":
49854993
files.reverse()
4994+
49864995
with pytest.raises(
49874996
ValueError, match="cannot align objects with join='exact'"
49884997
):
@@ -4997,17 +5006,15 @@ def test_open_mfdataset_exact_join_raises_error(
49975006
def test_open_mfdataset_defaults_with_exact_join_warns_as_well_as_raising(
49985007
self,
49995008
) -> None:
5000-
with self.setup_files_and_datasets(fuzz=0.1) as (files, _):
5001-
with set_options(use_new_combine_kwarg_defaults=False):
5002-
files.reverse()
5003-
with pytest.warns(
5004-
FutureWarning,
5005-
match="will change from data_vars='all' to data_vars='minimal'",
5006-
):
5007-
with pytest.raises(
5008-
ValueError, match="cannot align objects with join='exact'"
5009-
):
5010-
open_mfdataset(files, combine="by_coords", join="exact")
5009+
with self.setup_files_and_datasets(fuzz=0.1, new_combine_kwargs=True) as (
5010+
files,
5011+
_,
5012+
):
5013+
files.reverse()
5014+
with pytest.raises(
5015+
ValueError, match="cannot align objects with join='exact'"
5016+
):
5017+
open_mfdataset(files, combine="by_coords")
50115018

50125019
def test_common_coord_when_datavars_all(self) -> None:
50135020
opt: Final = "all"
@@ -5030,7 +5037,10 @@ def test_common_coord_when_datavars_all(self) -> None:
50305037
def test_common_coord_when_datavars_minimal(self) -> None:
50315038
opt: Final = "minimal"
50325039

5033-
with self.setup_files_and_datasets() as (files, [ds1, ds2]):
5040+
with self.setup_files_and_datasets(new_combine_kwargs=True) as (
5041+
files,
5042+
[ds1, ds2],
5043+
):
50345044
# open the files using data_vars option
50355045
with open_mfdataset(
50365046
files, data_vars=opt, combine="nested", concat_dim="t"
@@ -5065,15 +5075,18 @@ def test_invalid_data_vars_value_should_fail(self) -> None:
50655075
def test_open_mfdataset_warns_when_kwargs_set_to_different(
50665076
self, combine, concat_dim, kwargs
50675077
) -> None:
5068-
with self.setup_files_and_datasets() as (files, [ds1, ds2]):
5078+
with self.setup_files_and_datasets(new_combine_kwargs=True) as (
5079+
files,
5080+
[ds1, ds2],
5081+
):
50695082
if combine == "by_coords":
50705083
files.reverse()
50715084
with pytest.raises(
5072-
ValueError, match="Previously the default was compat='no_conflicts'"
5085+
ValueError, match="Previously the default was `compat='no_conflicts'`"
50735086
):
50745087
open_mfdataset(files, combine=combine, concat_dim=concat_dim, **kwargs)
50755088
with pytest.raises(
5076-
ValueError, match="Previously the default was compat='equals'"
5089+
ValueError, match="Previously the default was `compat='equals'`"
50775090
):
50785091
xr.concat([ds1, ds2], dim="t", **kwargs)
50795092

@@ -5357,9 +5370,7 @@ def test_encoding_mfdataset(self) -> None:
53575370
ds2.t.encoding["units"] = "days since 2000-01-01"
53585371
ds1.to_netcdf(tmp1)
53595372
ds2.to_netcdf(tmp2)
5360-
with open_mfdataset(
5361-
[tmp1, tmp2], combine="nested", compat="no_conflicts", join="outer"
5362-
) as actual:
5373+
with open_mfdataset([tmp1, tmp2], combine="nested", concat_dim="t") as actual:
53635374
assert actual.t.encoding["units"] == original.t.encoding["units"]
53645375
assert actual.t.encoding["units"] == ds1.t.encoding["units"]
53655376
assert actual.t.encoding["units"] != ds2.t.encoding["units"]
@@ -5382,30 +5393,20 @@ def test_encoding_mfdataset_new_defaults(self) -> None:
53825393
ds1.to_netcdf(tmp1)
53835394
ds2.to_netcdf(tmp2)
53845395

5385-
with set_options(use_new_combine_kwarg_defaults=False):
5386-
with pytest.warns(
5387-
FutureWarning,
5388-
match="will change from join='outer' to join='exact'",
5389-
):
5390-
with pytest.warns(
5391-
FutureWarning,
5392-
match="will change from compat='no_conflicts' to compat='override'",
5393-
):
5394-
with open_mfdataset([tmp1, tmp2], combine="nested") as old:
5395-
assert (
5396-
old.t.encoding["units"]
5397-
== original.t.encoding["units"]
5398-
)
5399-
assert (
5400-
old.t.encoding["units"] == ds1.t.encoding["units"]
5401-
)
5402-
assert (
5403-
old.t.encoding["units"] != ds2.t.encoding["units"]
5404-
)
5396+
for setting in [True, False]:
5397+
with set_options(use_new_combine_kwarg_defaults=setting):
5398+
with open_mfdataset(
5399+
[tmp1, tmp2], combine="nested", concat_dim="t"
5400+
) as old:
5401+
assert (
5402+
old.t.encoding["units"] == original.t.encoding["units"]
5403+
)
5404+
assert old.t.encoding["units"] == ds1.t.encoding["units"]
5405+
assert old.t.encoding["units"] != ds2.t.encoding["units"]
54055406

54065407
with set_options(use_new_combine_kwarg_defaults=True):
54075408
with pytest.raises(
5408-
ValueError, match="Error might be related to new default"
5409+
AlignmentError, match="If you are intending to concatenate"
54095410
):
54105411
open_mfdataset([tmp1, tmp2], combine="nested")
54115412

@@ -7083,20 +7084,20 @@ def test_zarr_safe_chunk_region(self, mode: Literal["r+", "a"]):
70837084
@requires_h5netcdf
70847085
@requires_fsspec
70857086
def test_h5netcdf_storage_options() -> None:
7086-
with set_options(use_new_combine_kwarg_defaults=True):
7087-
with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2):
7088-
ds1 = create_test_data()
7089-
ds1.to_netcdf(f1, engine="h5netcdf")
7087+
with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2):
7088+
ds1 = create_test_data()
7089+
ds1.to_netcdf(f1, engine="h5netcdf")
70907090

7091-
ds2 = create_test_data()
7092-
ds2.to_netcdf(f2, engine="h5netcdf")
7091+
ds2 = create_test_data()
7092+
ds2.to_netcdf(f2, engine="h5netcdf")
70937093

7094-
files = [f"file://{f}" for f in [f1, f2]]
7095-
with xr.open_mfdataset(
7096-
files,
7097-
engine="h5netcdf",
7098-
concat_dim="time",
7099-
combine="nested",
7100-
storage_options={"skip_instance_cache": False},
7101-
) as ds:
7102-
assert_identical(xr.concat([ds1, ds2], dim="time"), ds)
7094+
files = [f"file://{f}" for f in [f1, f2]]
7095+
with xr.open_mfdataset(
7096+
files,
7097+
engine="h5netcdf",
7098+
concat_dim="time",
7099+
data_vars="all",
7100+
combine="nested",
7101+
storage_options={"skip_instance_cache": False},
7102+
) as ds:
7103+
assert_identical(xr.concat([ds1, ds2], dim="time"), ds)

0 commit comments

Comments
 (0)