diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2df0d58bf6e..c5152dafed2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,8 @@ New Features - Added ability to save ``DataArray`` objects directly to Zarr using :py:meth:`~xarray.DataArray.to_zarr`. (:issue:`7692`, :pull:`7693`) . By `Joe Hamman `_. +- Support `pandas>=2.0` (:pull:`7724`) + By `Justus Magin `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 7e6d4ab82d7..b04683b2f5d 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -115,7 +115,7 @@ def _get_date_field(values, name, dtype): access_method, values, name, dtype=dtype, new_axis=new_axis, chunks=chunks ) else: - return access_method(values, name) + return access_method(values, name).astype(dtype) def _round_through_series_or_index(values, name, freq): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index c7720344940..93e9e535fe3 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -15,7 +15,13 @@ PandasIndexingAdapter, PandasMultiIndexingAdapter, ) -from xarray.core.utils import Frozen, get_valid_numpy_dtype, is_dict_like, is_scalar +from xarray.core.utils import ( + Frozen, + emit_user_level_warning, + get_valid_numpy_dtype, + is_dict_like, + is_scalar, +) if TYPE_CHECKING: from xarray.core.types import ErrorOptions, T_Index @@ -166,9 +172,21 @@ def safe_cast_to_index(array: Any) -> pd.Index: elif isinstance(array, PandasIndexingAdapter): index = array.array else: - kwargs = {} - if hasattr(array, "dtype") and array.dtype.kind == "O": - kwargs["dtype"] = object + kwargs: dict[str, str] = {} + if hasattr(array, "dtype"): + if array.dtype.kind == "O": + kwargs["dtype"] = "object" + elif array.dtype == "float16": + emit_user_level_warning( + ( + "`pandas.Index` does not support the `float16` dtype." + " Casting to `float64` for you, but in the future please" + " manually cast to either `float32` and `float64`." + ), + category=DeprecationWarning, + ) + kwargs["dtype"] = "float64" + index = pd.Index(np.asarray(array), **kwargs) return _maybe_cast_to_cftimeindex(index) @@ -259,6 +277,8 @@ def get_indexer_nd(index, labels, method=None, tolerance=None): labels """ flat_labels = np.ravel(labels) + if flat_labels.dtype == "float16": + flat_labels = flat_labels.astype("float64") flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance) indexer = flat_indexer.reshape(labels.shape) return indexer diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index ef91257c4d9..64b487628c8 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -59,6 +59,8 @@ def setup(self): "quarter", "date", "time", + "daysinmonth", + "days_in_month", "is_month_start", "is_month_end", "is_quarter_start", @@ -74,7 +76,18 @@ def test_field_access(self, field) -> None: else: data = getattr(self.times, field) - expected = xr.DataArray(data, name=field, coords=[self.times], dims=["time"]) + if data.dtype.kind != "b" and field not in ("date", "time"): + # pandas 2.0 returns int32 for integer fields now + data = data.astype("int64") + + translations = { + "weekday": "dayofweek", + "daysinmonth": "days_in_month", + "weekofyear": "week", + } + name = translations.get(field, field) + + expected = xr.DataArray(data, name=name, coords=[self.times], dims=["time"]) if field in ["week", "weekofyear"]: with pytest.warns( @@ -84,7 +97,8 @@ def test_field_access(self, field) -> None: else: actual = getattr(self.data.time.dt, field) - assert_equal(expected, actual) + assert expected.dtype == actual.dtype + assert_identical(expected, actual) @pytest.mark.parametrize( "field, pandas_field", diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4ffa95e16e6..dcbfd42c9f1 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1023,32 +1023,53 @@ def test_sel_dataarray_datetime_slice(self) -> None: result = array.sel(delta=slice(array.delta[0], array.delta[-1])) assert_equal(result, array) - def test_sel_float(self) -> None: + @pytest.mark.parametrize( + ["coord_values", "indices"], + ( + pytest.param( + np.array([0.0, 0.111, 0.222, 0.333], dtype="float64"), + slice(1, 3), + id="float64", + ), + pytest.param( + np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"), + slice(1, 3), + id="float32", + ), + pytest.param( + np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"), [2], id="scalar" + ), + ), + ) + def test_sel_float(self, coord_values, indices) -> None: data_values = np.arange(4) - # case coords are float32 and label is list of floats - float_values = [0.0, 0.111, 0.222, 0.333] - coord_values = np.asarray(float_values, dtype="float32") - array = DataArray(data_values, [("float32_coord", coord_values)]) - expected = DataArray(data_values[1:3], [("float32_coord", coord_values[1:3])]) - actual = array.sel(float32_coord=float_values[1:3]) - # case coords are float16 and label is list of floats - coord_values_16 = np.asarray(float_values, dtype="float16") - expected_16 = DataArray( - data_values[1:3], [("float16_coord", coord_values_16[1:3])] - ) - array_16 = DataArray(data_values, [("float16_coord", coord_values_16)]) - actual_16 = array_16.sel(float16_coord=float_values[1:3]) + arr = DataArray(data_values, coords={"x": coord_values}, dims="x") - # case coord, label are scalars - expected_scalar = DataArray( - data_values[2], coords={"float32_coord": coord_values[2]} + actual = arr.sel(x=coord_values[indices]) + expected = DataArray( + data_values[indices], coords={"x": coord_values[indices]}, dims="x" ) - actual_scalar = array.sel(float32_coord=float_values[2]) - assert_equal(expected, actual) - assert_equal(expected_scalar, actual_scalar) - assert_equal(expected_16, actual_16) + assert_equal(actual, expected) + + def test_sel_float16(self) -> None: + data_values = np.arange(4) + coord_values = np.array([0.0, 0.111, 0.222, 0.333], dtype="float16") + indices = slice(1, 3) + + message = "`pandas.Index` does not support the `float16` dtype.*" + + with pytest.warns(DeprecationWarning, match=message): + arr = DataArray(data_values, coords={"x": coord_values}, dims="x") + with pytest.warns(DeprecationWarning, match=message): + expected = DataArray( + data_values[indices], coords={"x": coord_values[indices]}, dims="x" + ) + + actual = arr.sel(x=coord_values[indices]) + + assert_equal(actual, expected) def test_sel_float_multiindex(self) -> None: # regression test https://github.com/pydata/xarray/issues/5691 diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 0f6f353faf2..36f62fad71f 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -23,11 +23,13 @@ def new_method(): @pytest.mark.parametrize( - "a, b, expected", [["a", "b", np.array(["a", "b"])], [1, 2, pd.Index([1, 2])]] + ["a", "b", "expected"], + [ + [np.array(["a"]), np.array(["b"]), np.array(["a", "b"])], + [np.array([1], dtype="int64"), np.array([2], dtype="int64"), pd.Index([1, 2])], + ], ) def test_maybe_coerce_to_str(a, b, expected): - a = np.array([a]) - b = np.array([b]) index = pd.Index(a).append(pd.Index(b)) actual = utils.maybe_coerce_to_str(index, [a, b])