From 83faf564d275fdebf15ea351196a1d127e8826a5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 19 Feb 2025 08:49:18 -0700 Subject: [PATCH 01/15] Fix a bug when setting complete chunks Closes #2849 --- src/zarr/core/codec_pipeline.py | 25 ++++++++++++++----------- tests/test_array.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index 0c53cda96c..abbb9a8296 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -296,17 +296,6 @@ def _merge_chunk_array( is_complete_chunk: bool, drop_axes: tuple[int, ...], ) -> NDBuffer: - if is_complete_chunk and value.shape == chunk_spec.shape: - return value - if existing_chunk_array is None: - chunk_array = chunk_spec.prototype.nd_buffer.create( - shape=chunk_spec.shape, - dtype=chunk_spec.dtype, - order=chunk_spec.order, - fill_value=fill_value_or_default(chunk_spec), - ) - else: - chunk_array = existing_chunk_array.copy() # make a writable copy if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype): chunk_value = value else: @@ -320,6 +309,20 @@ def _merge_chunk_array( for idx in range(chunk_spec.ndim) ) chunk_value = chunk_value[item] + if is_complete_chunk: + # TODO: For the last chunk, we could have is_complete_chunk=True + # that is smaller than the chunk_spec.shape but this throws + # an error in the _decode_single + return chunk_value + if existing_chunk_array is None: + chunk_array = chunk_spec.prototype.nd_buffer.create( + shape=chunk_spec.shape, + dtype=chunk_spec.dtype, + order=chunk_spec.order, + fill_value=fill_value_or_default(chunk_spec), + ) + else: + chunk_array = existing_chunk_array.copy() # make a writable copy chunk_array[chunk_selection] = chunk_value return chunk_array diff --git a/tests/test_array.py b/tests/test_array.py index b81f966e20..ad93d1a425 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -829,6 +829,32 @@ def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None: z.append(b) +def test_append_shape_not_equals_chunk_shape() -> None: + # regression test for GH2849 + g = zarr.open("foo.zarr", zarr_format=3, mode="w") + g["bar"] = np.arange(3) + + size = 4 + data = [0, 1, 2, 3] + g["bar"].append(data) + assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) + + size = 3 + data = [-1, -2, -3] + g["bar"].append(data) + assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) + + size = 2 + data = [-10, -11] + g["bar"].append(data) + assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) + + size = 3 + data = [-4, -5, -6] + g["bar"].append(data) + assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) + + @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("write_empty_chunks", [True, False]) @pytest.mark.parametrize("fill_value", [0, 5]) From 1797c6f493880c7c6fedf0cd676a33413b4ae34f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 19 Feb 2025 09:04:24 -0700 Subject: [PATCH 02/15] much simpler test --- src/zarr/core/codec_pipeline.py | 2 +- tests/test_array.py | 26 -------------------------- tests/test_indexing.py | 9 +++++++++ 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py index abbb9a8296..628a7e0487 100644 --- a/src/zarr/core/codec_pipeline.py +++ b/src/zarr/core/codec_pipeline.py @@ -309,7 +309,7 @@ def _merge_chunk_array( for idx in range(chunk_spec.ndim) ) chunk_value = chunk_value[item] - if is_complete_chunk: + if is_complete_chunk and chunk_value.shape == chunk_spec.shape: # TODO: For the last chunk, we could have is_complete_chunk=True # that is smaller than the chunk_spec.shape but this throws # an error in the _decode_single diff --git a/tests/test_array.py b/tests/test_array.py index ad93d1a425..b81f966e20 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -829,32 +829,6 @@ def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None: z.append(b) -def test_append_shape_not_equals_chunk_shape() -> None: - # regression test for GH2849 - g = zarr.open("foo.zarr", zarr_format=3, mode="w") - g["bar"] = np.arange(3) - - size = 4 - data = [0, 1, 2, 3] - g["bar"].append(data) - assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) - - size = 3 - data = [-1, -2, -3] - g["bar"].append(data) - assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) - - size = 2 - data = [-10, -11] - g["bar"].append(data) - assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) - - size = 3 - data = [-4, -5, -6] - g["bar"].append(data) - assert (g["bar"][-size:] == data).all(), (g["bar"][-size:], data) - - @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("write_empty_chunks", [True, False]) @pytest.mark.parametrize("fill_value", [0, 5]) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 30d0d75f22..402aecb797 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -815,6 +815,15 @@ def test_set_orthogonal_selection_1d(store: StorePath) -> None: _test_set_orthogonal_selection(v, a, z, selection) +def test_set_orthogonal_selection_1d_last_two_chunks(): + # regression test for GH2849 + g = zarr.open_group("foo.zarr", zarr_format=3, mode="w") + a = g.create_array("bar", shape=(10,), chunks=(3,), dtype=int) + data = np.array([7, 8, 9]) + a[slice(7, 10)] = data + np.testing.assert_array_equal(a[slice(7, 10)], data) + + def _test_set_orthogonal_selection_2d( v: npt.NDArray[np.int_], a: npt.NDArray[np.int_], From c0057e19852b929af7111b5c840a067cfa05a7c5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 19 Feb 2025 09:55:10 -0700 Subject: [PATCH 03/15] add release note --- changes/2851.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2851.bugfix.rst diff --git a/changes/2851.bugfix.rst b/changes/2851.bugfix.rst new file mode 100644 index 0000000000..977f683847 --- /dev/null +++ b/changes/2851.bugfix.rst @@ -0,0 +1 @@ +Fix a bug when setting values of a smaller last chunk. From 63eb1ea9783a513c0643b44cbacfa006f188d7c2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 10:02:28 -0700 Subject: [PATCH 04/15] Update strategy priorities: 1. Emphasize arrays of side > 1, 2. Emphasize indexing the last chunk for both setitem & getitem --- src/zarr/testing/strategies.py | 62 ++++++++++++++++++++++++++++------ tests/test_properties.py | 7 ++-- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 0e25e44592..716b9993f3 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -1,10 +1,11 @@ +import math import sys from typing import Any, Literal import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import given, settings # noqa: F401 +from hypothesis import event, given, settings # noqa: F401 from hypothesis.strategies import SearchStrategy import zarr @@ -97,7 +98,8 @@ def clear_store(x: Store) -> Store: stores = st.builds(MemoryStore, st.just({})).map(clear_store) compressors = st.sampled_from([None, "default"]) zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2]) -array_shapes = npst.array_shapes(max_dims=4, min_side=0) +# We de-prioritize arrays having dim sizes 0, 1, 2 +array_shapes = npst.array_shapes(max_dims=4, min_side=3) | npst.array_shapes(max_dims=4, min_side=0) @st.composite # type: ignore[misc] @@ -174,11 +176,19 @@ def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]: st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape]) ) # 2. and now generate the chunks tuple - return tuple( + chunks = tuple( size // nchunks if nchunks > 0 else 0 for size, nchunks in zip(shape, numchunks, strict=True) ) + for c in chunks: + event("chunk size", c) + + if any((c != 0 and s % c != 0) for s, c in zip(shape, chunks, strict=True)): + event("smaller last chunk") + + return chunks + @st.composite # type: ignore[misc] def shard_shapes( @@ -267,23 +277,55 @@ def arrays( return a +@st.composite # type: ignore[misc] +def simple_arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, +) -> Any: + return draw( + arrays( + shapes=shapes, + attrs=st.none(), + paths=paths(max_num_nodes=2), + compressors=st.sampled_from([None, "default"]), + ) + ) + + def is_negative_slice(idx: Any) -> bool: return isinstance(idx, slice) and idx.step is not None and idx.step < 0 +@st.composite # type: ignore[misc] +def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any: + """ + A strategy that slices ranges that include the last chunk. + This is intended to stress-test handling of a possibly smaller last chunk. + """ + slicers = [] + for size in shape: + start = draw(st.integers(min_value=size // 2, max_value=size - 1)) + length = draw(st.integers(min_value=0, max_value=size - start)) + slicers.append(slice(start, start + length)) + event("drawing end slice") + return tuple(slicers) + + @st.composite # type: ignore[misc] def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs: Any) -> Any: """Basic indices without unsupported negative slices.""" - return draw( - npst.basic_indices(shape=shape, **kwargs).filter( - lambda idxr: ( - not ( - is_negative_slice(idxr) - or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) - ) + strategy = npst.basic_indices(shape=shape, **kwargs).filter( + lambda idxr: ( + not ( + is_negative_slice(idxr) + or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) ) ) ) + if math.prod(shape) >= 3: + strategy = end_slices(shape=shape) | strategy + return draw(strategy) @st.composite # type: ignore[misc] diff --git a/tests/test_properties.py b/tests/test_properties.py index 5643cf3853..90acbdb4b9 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -18,6 +18,7 @@ basic_indices, numpy_arrays, orthogonal_indices, + simple_arrays, stores, zarr_formats, ) @@ -50,7 +51,7 @@ def test_array_creates_implicit_groups(array): @given(data=st.data()) def test_basic_indexing(data: st.DataObject) -> None: - zarray = data.draw(arrays()) + zarray = data.draw(simple_arrays()) nparray = zarray[:] indexer = data.draw(basic_indices(shape=nparray.shape)) actual = zarray[indexer] @@ -65,7 +66,7 @@ def test_basic_indexing(data: st.DataObject) -> None: @given(data=st.data()) def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. - zarray = data.draw(arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) + zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) @@ -82,7 +83,7 @@ def test_oindex(data: st.DataObject) -> None: @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. - zarray = data.draw(arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) + zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] indexer = data.draw( From 9bd02c0fb47f5627f5b8f5f0b374637881f41155 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 10:09:54 -0700 Subject: [PATCH 05/15] Use short node names --- src/zarr/testing/strategies.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 716b9993f3..90e2726dda 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -29,6 +29,16 @@ ) +@st.composite # type: ignore[misc] +def keys(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> Any: + return draw(st.lists(node_names, min_size=1, max_size=max_num_nodes).map("/".join)) + + +@st.composite # type: ignore[misc] +def paths(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> Any: + return draw(st.just("/") | keys(max_num_nodes=max_num_nodes)) + + def v3_dtypes() -> st.SearchStrategy[np.dtype]: return ( npst.boolean_dtypes() @@ -88,10 +98,11 @@ def clear_store(x: Store) -> Store: node_names = st.text(zarr_key_chars, min_size=1).filter( lambda t: t not in (".", "..") and not t.startswith("__") ) +short_node_names = st.text(zarr_key_chars, max_size=3, min_size=1).filter( + lambda t: t not in (".", "..") and not t.startswith("__") +) array_names = node_names attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) -keys = st.lists(node_names, min_size=1).map("/".join) -paths = st.just("/") | keys # st.builds will only call a new store constructor for different keyword arguments # i.e. stores.examples() will always return the same object per Store class. # So we map a clear to reset the store. @@ -221,7 +232,7 @@ def arrays( shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, compressors: st.SearchStrategy = compressors, stores: st.SearchStrategy[StoreLike] = stores, - paths: st.SearchStrategy[str | None] = paths, + paths: st.SearchStrategy[str | None] = paths(), # noqa: B008 array_names: st.SearchStrategy = array_names, arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, @@ -286,8 +297,9 @@ def simple_arrays( return draw( arrays( shapes=shapes, - attrs=st.none(), paths=paths(max_num_nodes=2), + array_names=short_node_names, + attrs=st.none(), compressors=st.sampled_from([None, "default"]), ) ) From ca6dcbcd084cfad13ff741597701e3967821a393 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 10:16:51 -0700 Subject: [PATCH 06/15] bug fix --- src/zarr/testing/stateful.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index 3e8dbcdf04..ede83201ae 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -325,7 +325,7 @@ def __init__(self, store: Store) -> None: def init_store(self) -> None: self.store.clear() - @rule(key=zarr_keys, data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE)) + @rule(key=zarr_keys(), data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE)) def set(self, key: str, data: DataObject) -> None: note(f"(set) Setting {key!r} with {data}") assert not self.store.read_only @@ -334,7 +334,7 @@ def set(self, key: str, data: DataObject) -> None: self.model[key] = data_buf @precondition(lambda self: len(self.model.keys()) > 0) - @rule(key=zarr_keys, data=st.data()) + @rule(key=zarr_keys(), data=st.data()) def get(self, key: str, data: DataObject) -> None: key = data.draw( st.sampled_from(sorted(self.model.keys())) @@ -344,7 +344,7 @@ def get(self, key: str, data: DataObject) -> None: # to bytes here necessary because data_buf set to model in set() assert self.model[key] == store_value - @rule(key=zarr_keys, data=st.data()) + @rule(key=zarr_keys(), data=st.data()) def get_invalid_zarr_keys(self, key: str, data: DataObject) -> None: note("(get_invalid)") assume(key not in self.model) @@ -408,7 +408,7 @@ def is_empty(self) -> None: # make sure they either both are or both aren't empty (same state) assert self.store.is_empty("") == (not self.model) - @rule(key=zarr_keys) + @rule(key=zarr_keys()) def exists(self, key: str) -> None: note("(exists)") From 349afa830fd774d309fe5c5893ccc511a49da9d1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 10:44:04 -0700 Subject: [PATCH 07/15] Add scalar tests --- tests/test_indexing.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 402aecb797..02f6c25502 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -815,7 +815,7 @@ def test_set_orthogonal_selection_1d(store: StorePath) -> None: _test_set_orthogonal_selection(v, a, z, selection) -def test_set_orthogonal_selection_1d_last_two_chunks(): +def test_set_item_1d_last_two_chunks(): # regression test for GH2849 g = zarr.open_group("foo.zarr", zarr_format=3, mode="w") a = g.create_array("bar", shape=(10,), chunks=(3,), dtype=int) @@ -823,6 +823,16 @@ def test_set_orthogonal_selection_1d_last_two_chunks(): a[slice(7, 10)] = data np.testing.assert_array_equal(a[slice(7, 10)], data) + z = zarr.open_group("foo.zarr", mode="w") + z.create_array("zoo", dtype=float, shape=()) + z["zoo"][...] = np.array(1) # why doesn't [:] work? + np.testing.assert_equal(z["zoo"][()], np.array(1)) + + z = zarr.open_group("foo.zarr", mode="w") + z.create_array("zoo", dtype=float, shape=()) + z["zoo"][...] = 1 # why doesn't [:] work? + np.testing.assert_equal(z["zoo"][()], np.array(1)) + def _test_set_orthogonal_selection_2d( v: npt.NDArray[np.int_], From a13eec10a20ee43412759eeb72d3f6aceda01578 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 10:34:52 -0700 Subject: [PATCH 08/15] [revert] --- tests/test_properties.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index 90acbdb4b9..2d1a1d5886 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -7,7 +7,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import assume, given +from hypothesis import assume, given, note, reproduce_failure from zarr.abc.store import Store from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -64,6 +64,7 @@ def test_basic_indexing(data: st.DataObject) -> None: @given(data=st.data()) +# @reproduce_failure("6.126.0", b"AXicFYiBDQAwDIJ060F7gdP6+TQkEkH7cJAZCXHpjjltecJuiz+MwwVc") def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) @@ -77,6 +78,7 @@ def test_oindex(data: st.DataObject) -> None: new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype)) nparray[npindexer] = new_data zarray.oindex[zindexer] = new_data + note((new_data, npindexer, nparray, zindexer, zarray[:])) assert_array_equal(nparray, zarray[:]) From 0e0b34f6b370cd1d170ced4ce0aabf51f87eb215 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 11:44:19 -0700 Subject: [PATCH 09/15] Add unit test --- tests/test_indexing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 02f6c25502..09da43cc54 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -424,6 +424,13 @@ def test_orthogonal_indexing_fallback_on_getitem_2d( np.testing.assert_array_equal(z[index], expected_result) +def test_setitem_repeated_index(): + array = zarr.array(data=np.zeros((4,)), chunks=(1,)) + indexer = np.array([-1, -1, 0, 0]) + array.oindex[(indexer,)] = [0, 1, 2, 3] + np.testing.assert_array_equal(array[:], np.array([3, 0, 0, 1])) + + Index = list[int] | tuple[slice | int | list[int], ...] From 75114ae21661c5c446b67b38746936fc90da3577 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 14:26:34 -0700 Subject: [PATCH 10/15] one more test --- tests/test_indexing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 09da43cc54..db30d69d0c 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -430,6 +430,10 @@ def test_setitem_repeated_index(): array.oindex[(indexer,)] = [0, 1, 2, 3] np.testing.assert_array_equal(array[:], np.array([3, 0, 0, 1])) + indexer = np.array([-1, 0, 0, -1]) + array.oindex[(indexer,)] = [0, 1, 2, 3] + np.testing.assert_array_equal(array[:], np.array([2, 0, 0, 3])) + Index = list[int] | tuple[slice | int | list[int], ...] From 7a9d10dd433e389b47d07abf333a8d85711b6350 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 15:11:00 -0700 Subject: [PATCH 11/15] Add xfails --- tests/test_indexing.py | 1 + tests/test_properties.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index db30d69d0c..153544a169 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -424,6 +424,7 @@ def test_orthogonal_indexing_fallback_on_getitem_2d( np.testing.assert_array_equal(z[index], expected_result) +@pytest.mark.xfail(reason="fails on ubuntu, windows; numpy=2.2; in CI") def test_setitem_repeated_index(): array = zarr.array(data=np.zeros((4,)), chunks=(1,)) indexer = np.array([-1, -1, 0, 0]) diff --git a/tests/test_properties.py b/tests/test_properties.py index 2d1a1d5886..bab5772053 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from numpy.testing import assert_array_equal @@ -7,7 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import assume, given, note, reproduce_failure +from hypothesis import assume, given, note from zarr.abc.store import Store from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -75,6 +76,10 @@ def test_oindex(data: st.DataObject) -> None: assert_array_equal(nparray[npindexer], actual) assume(zarray.shards is None) # GH2834 + for idxr in npindexer: + if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size: + # behaviour of setitem with repeated indices is not guaranteed in practice + assume(False) new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype)) nparray[npindexer] = new_data zarray.oindex[zindexer] = new_data From 026936a9b74ef0ef1b7a78f25af28d9ddcacb5a9 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 15:22:50 -0700 Subject: [PATCH 12/15] switch to skip, XPASS is not allwoed --- tests/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 153544a169..1bad861622 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -424,7 +424,7 @@ def test_orthogonal_indexing_fallback_on_getitem_2d( np.testing.assert_array_equal(z[index], expected_result) -@pytest.mark.xfail(reason="fails on ubuntu, windows; numpy=2.2; in CI") +@pytest.mark.skip(reason="fails on ubuntu, windows; numpy=2.2; in CI") def test_setitem_repeated_index(): array = zarr.array(data=np.zeros((4,)), chunks=(1,)) indexer = np.array([-1, -1, 0, 0]) From 7e3246344db425f43b8d6f5b1fa2cd75aeefe8b4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 14:42:40 -0700 Subject: [PATCH 13/15] Fix test --- src/zarr/testing/strategies.py | 6 ++++-- tests/test_properties.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 90e2726dda..96d664f5aa 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -165,13 +165,15 @@ def numpy_arrays( draw: st.DrawFn, *, shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, - zarr_formats: st.SearchStrategy[ZarrFormat] = zarr_formats, + dtype: np.dtype[Any] | None = None, + zarr_formats: st.SearchStrategy[ZarrFormat] | None = zarr_formats, ) -> Any: """ Generate numpy arrays that can be saved in the provided Zarr format. """ zarr_format = draw(zarr_formats) - dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes()) + if dtype is None: + dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes()) if np.issubdtype(dtype, np.str_): safe_unicode_strings = safe_unicode_for_dtype(dtype) return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings)) diff --git a/tests/test_properties.py b/tests/test_properties.py index bab5772053..a446eb6c14 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -58,7 +58,7 @@ def test_basic_indexing(data: st.DataObject) -> None: actual = zarray[indexer] assert_array_equal(nparray[indexer], actual) - new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype)) + new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) zarray[indexer] = new_data nparray[indexer] = new_data assert_array_equal(nparray, zarray[:]) @@ -80,7 +80,7 @@ def test_oindex(data: st.DataObject) -> None: if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size: # behaviour of setitem with repeated indices is not guaranteed in practice assume(False) - new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype)) + new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) nparray[npindexer] = new_data zarray.oindex[zindexer] = new_data note((new_data, npindexer, nparray, zindexer, zarray[:])) From 21a0c6ae07ce8cb02fa3b584bdca35665dafe2f0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 21 Feb 2025 15:30:26 -0700 Subject: [PATCH 14/15] cleaniup --- tests/test_properties.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_properties.py b/tests/test_properties.py index a446eb6c14..68d8bb0a0e 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -8,7 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import assume, given, note +from hypothesis import assume, given from zarr.abc.store import Store from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -65,7 +65,6 @@ def test_basic_indexing(data: st.DataObject) -> None: @given(data=st.data()) -# @reproduce_failure("6.126.0", b"AXicFYiBDQAwDIJ060F7gdP6+TQkEkH7cJAZCXHpjjltecJuiz+MwwVc") def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) @@ -83,7 +82,6 @@ def test_oindex(data: st.DataObject) -> None: new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) nparray[npindexer] = new_data zarray.oindex[zindexer] = new_data - note((new_data, npindexer, nparray, zindexer, zarray[:])) assert_array_equal(nparray, zarray[:]) From eb6b2d89ccb9602f7004b624e511088e2888a96b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 23 Feb 2025 18:15:37 +0100 Subject: [PATCH 15/15] add asynchronous=true kwarg to AsyncFileSystemWrapper --- src/zarr/storage/_fsspec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/storage/_fsspec.py b/src/zarr/storage/_fsspec.py index 1cc7039e68..a4730a93d9 100644 --- a/src/zarr/storage/_fsspec.py +++ b/src/zarr/storage/_fsspec.py @@ -178,7 +178,7 @@ def from_url( try: from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper - fs = AsyncFileSystemWrapper(fs) + fs = AsyncFileSystemWrapper(fs, asynchronous=True) except ImportError as e: raise ImportError( f"The filesystem for URL '{url}' is synchronous, and the required "