Skip to content

Commit f1738a0

Browse files
authored
Clean up backend indexing some more (#10376)
* Making decoding arrays lazy too * Add IndexingAdapter mixin class * Cleanup backends some more * Revert "Add IndexingAdapter mixin class" This reverts commit 930f24d. * Fix scipy backend * Add scipy-only CI job xref #8909 * Pin array-api-strict * Fix doctest * Add test for #8909 Closes #8909, #8921 * Remove user warning * fix types * Add whats-new * fix types * Fix docs build
1 parent 8796d55 commit f1738a0

17 files changed

+93
-23
lines changed

.github/workflows/ci.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ jobs:
5454
- env: "bare-minimum"
5555
python-version: "3.10"
5656
os: ubuntu-latest
57+
- env: "bare-min-and-scipy"
58+
python-version: "3.10"
59+
os: ubuntu-latest
5760
- env: "min-all-deps"
5861
python-version: "3.10"
5962
os: ubuntu-latest

ci/requirements/all-but-dask.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- nodefaults
55
dependencies:
66
- aiobotocore
7-
- array-api-strict
7+
- array-api-strict<2.4
88
- boto3
99
- bottleneck
1010
- cartopy

ci/requirements/all-but-numba.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
# Pin a "very new numpy" (updated Sept 24, 2024)
77
- numpy>=2.1.1
88
- aiobotocore
9-
- array-api-strict
9+
- array-api-strict<2.4
1010
- boto3
1111
- bottleneck
1212
- cartopy
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: xarray-tests
2+
channels:
3+
- conda-forge
4+
- nodefaults
5+
dependencies:
6+
- python=3.10
7+
- coveralls
8+
- pip
9+
- pytest
10+
- pytest-cov
11+
- pytest-env
12+
- pytest-mypy-plugins
13+
- pytest-timeout
14+
- pytest-xdist
15+
- numpy=1.24
16+
- packaging=23.1
17+
- pandas=2.1
18+
- scipy=1.11

ci/requirements/environment-3.14.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- nodefaults
55
dependencies:
66
- aiobotocore
7-
- array-api-strict
7+
- array-api-strict<2.4
88
- boto3
99
- bottleneck
1010
- cartopy

ci/requirements/environment-windows-3.14.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: xarray-tests
22
channels:
33
- conda-forge
44
dependencies:
5-
- array-api-strict
5+
- array-api-strict<2.4
66
- boto3
77
- bottleneck
88
- cartopy

ci/requirements/environment-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: xarray-tests
22
channels:
33
- conda-forge
44
dependencies:
5-
- array-api-strict
5+
- array-api-strict<2.4
66
- boto3
77
- bottleneck
88
- cartopy

ci/requirements/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- nodefaults
55
dependencies:
66
- aiobotocore
7-
- array-api-strict
7+
- array-api-strict<2.4
88
- boto3
99
- bottleneck
1010
- cartopy

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Bug fixes
2626
~~~~~~~~~
2727
- Fix Pydap test_cmp_local_file for numpy 2.3.0 changes, 1. do always return arrays for all versions and 2. skip astype(str) for numpy >= 2.3.0 for expected data. (:pull:`10421`)
2828
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
29+
- Fix the SciPy backend for netCDF3 files . (:issue:`8909`, :pull:`10376`)
30+
By `Deepak Cherian <https://github.com/dcherian>`_.
2931

3032

3133
Documentation

xarray/backends/memory.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77
from xarray.backends.common import AbstractWritableDataStore
8+
from xarray.core import indexing
89
from xarray.core.variable import Variable
910

1011

@@ -24,7 +25,12 @@ def get_attrs(self):
2425
return self._attributes
2526

2627
def get_variables(self):
27-
return self._variables
28+
res = {}
29+
for k, v in self._variables.items():
30+
v = v.copy(deep=True)
31+
res[k] = v
32+
v._data = indexing.LazilyIndexedArray(v._data)
33+
return res
2834

2935
def get_dimensions(self):
3036
return {d: s for v in self._variables.values() for d, s in v.dims.items()}

xarray/backends/scipy_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def ds(self):
190190
def open_store_variable(self, name, var):
191191
return Variable(
192192
var.dimensions,
193-
ScipyArrayWrapper(name, self),
193+
indexing.LazilyIndexedArray(ScipyArrayWrapper(name, self)),
194194
_decode_attrs(var._attributes),
195195
)
196196

xarray/coding/common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike):
6363
def dtype(self) -> np.dtype:
6464
return np.dtype(self._dtype)
6565

66+
def transpose(self, order):
67+
# For elementwise functions, we can compose transpose and function application
68+
return type(self)(self.array.transpose(order), self.func, self.dtype)
69+
6670
def _oindex_get(self, key):
6771
return type(self)(self.array.oindex[key], self.func, self.dtype)
6872

xarray/coding/strings.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin):
221221
values, when accessed, are automatically stacked along the last dimension.
222222
223223
>>> indexer = indexing.BasicIndexer((slice(None),))
224-
>>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer]
224+
>>> np.array(StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer])
225225
array(b'abc', dtype='|S3')
226226
"""
227227

@@ -250,14 +250,17 @@ def __repr__(self):
250250
return f"{type(self).__name__}({self.array!r})"
251251

252252
def _vindex_get(self, key):
253-
return _numpy_char_to_bytes(self.array.vindex[key])
253+
return type(self)(self.array.vindex[key])
254254

255255
def _oindex_get(self, key):
256-
return _numpy_char_to_bytes(self.array.oindex[key])
256+
return type(self)(self.array.oindex[key])
257257

258258
def __getitem__(self, key):
259259
# require slicing the last dimension completely
260260
key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim))
261261
if key.tuple[-1] != slice(None):
262262
raise IndexError("too many indices")
263-
return _numpy_char_to_bytes(self.array[key])
263+
return type(self)(self.array[key])
264+
265+
def get_duck_array(self):
266+
return _numpy_char_to_bytes(self.array.get_duck_array())

xarray/coding/variables.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder
2323
from xarray.core import dtypes, duck_array_ops, indexing
24+
from xarray.core.types import Self
2425
from xarray.core.variable import Variable
2526

2627
if TYPE_CHECKING:
@@ -58,13 +59,16 @@ def dtype(self) -> np.dtype:
5859
return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize))
5960

6061
def _oindex_get(self, key):
61-
return np.asarray(self.array.oindex[key], dtype=self.dtype)
62+
return type(self)(self.array.oindex[key])
6263

6364
def _vindex_get(self, key):
64-
return np.asarray(self.array.vindex[key], dtype=self.dtype)
65+
return type(self)(self.array.vindex[key])
6566

66-
def __getitem__(self, key) -> np.ndarray:
67-
return np.asarray(self.array[key], dtype=self.dtype)
67+
def __getitem__(self, key) -> Self:
68+
return type(self)(self.array[key])
69+
70+
def get_duck_array(self):
71+
return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype)
6872

6973

7074
class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
@@ -96,13 +100,16 @@ def dtype(self) -> np.dtype:
96100
return np.dtype("bool")
97101

98102
def _oindex_get(self, key):
99-
return np.asarray(self.array.oindex[key], dtype=self.dtype)
103+
return type(self)(self.array.oindex[key])
100104

101105
def _vindex_get(self, key):
102-
return np.asarray(self.array.vindex[key], dtype=self.dtype)
106+
return type(self)(self.array.vindex[key])
107+
108+
def __getitem__(self, key) -> Self:
109+
return type(self)(self.array[key])
103110

104-
def __getitem__(self, key) -> np.ndarray:
105-
return np.asarray(self.array[key], dtype=self.dtype)
111+
def get_duck_array(self):
112+
return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype)
106113

107114

108115
def _apply_mask(

xarray/conventions.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
)
1919
from xarray.core.utils import emit_user_level_warning
2020
from xarray.core.variable import IndexVariable, Variable
21-
from xarray.namedarray.utils import is_duck_dask_array
21+
from xarray.namedarray.utils import is_duck_array
2222

2323
CF_RELATED_DATA = (
2424
"bounds",
@@ -248,7 +248,15 @@ def decode_cf_variable(
248248

249249
encoding.setdefault("dtype", original_dtype)
250250

251-
if not is_duck_dask_array(data):
251+
if (
252+
# we don't need to lazily index duck arrays
253+
not is_duck_array(data)
254+
# These arrays already support lazy indexing
255+
# OR for IndexingAdapters, it makes no sense to wrap them
256+
and not isinstance(data, indexing.ExplicitlyIndexedNDArrayMixin)
257+
):
258+
# this path applies to bare BackendArray objects.
259+
# It is not hit for any internal Xarray backend
252260
data = indexing.LazilyIndexedArray(data)
253261

254262
return Variable(dimensions, data, attributes, encoding=encoding, fastpath=True)

xarray/core/indexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def from_variables(
717717

718718
# preserve wrapped pd.Index (if any)
719719
# accessing `.data` can load data from disk, so we only access if needed
720-
data = var._data.array if hasattr(var._data, "array") else var.data
720+
data = var._data if isinstance(var._data, PandasIndexingAdapter) else var.data # type: ignore[redundant-expr]
721721
# multi-index level variable: get level index
722722
if isinstance(var._data, PandasMultiIndexingAdapter):
723723
level = var._data.level

xarray/tests/test_backends.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,6 +1427,25 @@ def test_string_object_warning(self) -> None:
14271427
with self.roundtrip(original) as actual:
14281428
assert_identical(original, actual)
14291429

1430+
@pytest.mark.parametrize(
1431+
"indexer",
1432+
(
1433+
{"y": [1]},
1434+
{"y": slice(2)},
1435+
{"y": 1},
1436+
{"x": [1], "y": [1]},
1437+
{"x": ("x0", [0, 1]), "y": ("x0", [0, 1])},
1438+
),
1439+
)
1440+
def test_indexing_roundtrip(self, indexer) -> None:
1441+
# regression test for GH8909
1442+
ds = xr.Dataset()
1443+
ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"])
1444+
with self.roundtrip(ds) as ds2:
1445+
expected = ds2.sel(indexer)
1446+
with self.roundtrip(expected) as actual:
1447+
assert_identical(actual, expected)
1448+
14301449

14311450
class NetCDFBase(CFEncodedBase):
14321451
"""Tests for all netCDF3 and netCDF4 backends."""

0 commit comments

Comments
 (0)