diff --git a/pyproject.toml b/pyproject.toml index 63ecdd85be..e44b9a380d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ test = [ "msgpack", "s3fs", "pytest-asyncio", - "moto[s3]", + "moto[s3, server]", "flask-cors", "flask", "requests", @@ -198,7 +198,7 @@ dependencies = [ 'pytest', 'pytest-cov', 'pytest-asyncio', - 'moto[s3]', + 'moto[s3, server]' ] [tool.hatch.envs.upstream.env-vars] @@ -231,7 +231,7 @@ dependencies = [ 'pytest', 'pytest-cov', 'pytest-asyncio', - 'moto[s3]', + 'moto[s3, server]', ] [tool.hatch.envs.min_deps.scripts] diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index bd0a7ad503..e7569d0138 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -412,6 +412,18 @@ async def getsize_prefix(self, prefix: str) -> int: sizes = await concurrent_map(keys, self.getsize, limit=limit) return sum(sizes) + def _as_immutable(self: Self) -> Self: + """ + Return a mutable copy of the store. + """ + raise NotImplementedError + + def _as_mutable(self: Self) -> Self: + """ + Return an immutable (read-only) copy of the store. + """ + raise NotImplementedError + @runtime_checkable class ByteGetter(Protocol): diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3f36614cc2..9edf895770 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -52,7 +52,6 @@ "ones", "ones_like", "open", - "open_array", "open_consolidated", "open_group", "open_like", @@ -301,7 +300,7 @@ async def open( store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options) # TODO: the mode check below seems wrong! - if "shape" not in kwargs and mode in {"a", "r", "r+"}: + if "shape" not in kwargs and mode in _READ_MODES: try: metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) # TODO: remove this cast when we fix typing for array metadata dicts @@ -1093,7 +1092,6 @@ async def open_array( store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - try: return await AsyncArray.open(store_path, zarr_format=zarr_format) except FileNotFoundError: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 13a8c7209a..96b0db1de4 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -804,7 +804,6 @@ async def _save_metadata(self, ensure_parents: bool = False) -> None: ).items() ] ) - await asyncio.gather(*awaitables) @property @@ -1844,27 +1843,6 @@ def __setitem__(self, key: str, value: Any) -> None: def __repr__(self) -> str: return f"" - async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: - """Update the attributes of this group. - - Example - ------- - >>> import zarr - >>> group = zarr.group() - >>> await group.update_attributes_async({"foo": "bar"}) - >>> group.attrs.asdict() - {'foo': 'bar'} - """ - new_metadata = replace(self.metadata, attributes=new_attributes) - - # Write new metadata - to_save = new_metadata.to_buffer_dict(default_buffer_prototype()) - awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] - await asyncio.gather(*awaitables) - - async_group = replace(self._async_group, metadata=new_metadata) - return replace(self, _async_group=async_group) - @property def store_path(self) -> StorePath: """Path-like interface for the Store.""" diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 1e33967414..5a44d94b12 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -12,8 +12,6 @@ from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore -# from zarr.store.remote import RemoteStore - if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -72,6 +70,8 @@ async def open( """ await store._ensure_open() + if mode == "r" and not store.read_only: + store = store._as_immutable() self = cls(store, path) # fastpath if mode is None diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index f9b1747c31..3c2bf37c7d 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -5,7 +5,7 @@ import os import shutil from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Self from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer @@ -229,3 +229,9 @@ async def list_dir(self, prefix: str) -> AsyncIterator[str]: async def getsize(self, key: str) -> int: return os.path.getsize(self.root / key) + + def _as_immutable(self: Self) -> Self: + return type(self)(self.root, read_only=True) + + def _as_mutable(self: Self) -> Self: + return type(self)(self.root, read_only=False) diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index bc90b4f30f..107455bace 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -5,7 +5,7 @@ import time from collections import defaultdict from contextlib import contextmanager -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Self from zarr.abc.store import ByteRangeRequest, Store @@ -233,3 +233,15 @@ async def getsize(self, key: str) -> int: async def getsize_prefix(self, prefix: str) -> int: with self.log(prefix): return await self._store.getsize_prefix(prefix) + + def _as_immutable(self: Self) -> Self: + return type(self)( + store=self._store._as_immutable(), + log_level=self.log_level, + log_handler=self.log_handler, + ) + + def _as_mutable(self: Self) -> Self: + return type(self)( + store=self._store._as_mutable(), log_level=self.log_level, log_handler=self.log_handler + ) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 74d7758863..745db925db 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -169,6 +169,12 @@ async def list_dir(self, prefix: str) -> AsyncIterator[str]: for key in keys_unique: yield key + def _as_immutable(self: Self) -> Self: + return type(self)(self._store_dict, read_only=True) + + def _as_mutable(self: Self) -> Self: + return type(self)(self._store_dict, read_only=False) + class GpuMemoryStore(MemoryStore): """A GPU only memory store that stores every chunk in GPU memory irrespective @@ -236,3 +242,9 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None # Convert to gpu.Buffer gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value) await super().set(key, gpu_value, byte_range=byte_range) + + def _as_immutable(self: Self) -> Self: + return type(self)(self._store_dict, read_only=True) + + def _as_mutable(self: Self) -> Self: + return type(self)(self._store_dict, read_only=False) diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 2b8329c9fa..2ab8118539 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Self from zarr.abc.store import ByteRangeRequest, Store from zarr.storage.common import _dereference_path @@ -338,3 +338,13 @@ async def getsize(self, key: str) -> int: else: # fsspec doesn't have typing. We'll need to assume or verify this is true return int(size) + + def _as_immutable(self: Self) -> Self: + return type(self)( + self.fs, read_only=True, path=self.path, allowed_exceptions=self.allowed_exceptions + ) + + def _as_mutable(self: Self) -> Self: + return type(self)( + self.fs, read_only=False, path=self.path, allowed_exceptions=self.allowed_exceptions + ) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index a186b3cf59..b581d3ac26 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -5,7 +5,7 @@ import time import zipfile from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, Self from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype @@ -269,3 +269,27 @@ async def list_dir(self, prefix: str) -> AsyncIterator[str]: if k not in seen: seen.add(k) yield k + + def _as_immutable(self: Self) -> Self: + self.close() + new_store = type(self)( + self.path, + read_only=True, + mode="r", + compression=self.compression, + allowZip64=self.allowZip64, + ) + new_store._sync_open() + return new_store + + def _as_mutable(self: Self) -> Self: + self.close() + new_store = type(self)( + self.path, + read_only=False, + mode="a", + compression=self.compression, + allowZip64=self.allowZip64, + ) + new_store._sync_open() + return new_store diff --git a/tests/conftest.py b/tests/conftest.py index 35f31d39b3..7feb28b837 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import pathlib from dataclasses import dataclass, field from typing import TYPE_CHECKING @@ -7,6 +8,7 @@ import numpy as np import numpy.typing as npt import pytest +from botocore.session import Session from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config @@ -16,30 +18,60 @@ from zarr.storage.remote import RemoteStore if TYPE_CHECKING: - from collections.abc import Generator + from collections.abc import AsyncGenerator, Generator from typing import Any, Literal + import botocore from _pytest.compat import LEGACY_PATH from zarr.core.common import ChunkCoords, MemoryOrder, ZarrFormat +s3fs = pytest.importorskip("s3fs") +requests = pytest.importorskip("requests") +moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server") +moto = pytest.importorskip("moto") + +# ### amended from s3fs ### # +test_bucket_name = "test" +secure_bucket_name = "test-secure" + async def parse_store( - store: Literal["local", "memory", "remote", "zip"], path: str + store: str, + path: str, + s3: s3fs.S3FileSystem, # type: ignore[name-defined] ) -> LocalStore | MemoryStore | RemoteStore | ZipStore: - if store == "local": - return await LocalStore.open(path) - if store == "memory": - return await MemoryStore.open() - if store == "remote": - return await RemoteStore.open(url=path) - if store == "zip": - return await ZipStore.open(path + "/zarr.zip", mode="w") + """ + Take a string representation of a store and convert that string representation + into the appropriate store object, which is then returned. + """ + + match store: + case "local": + return LocalStore(path, read_only=False) + case "memory": + return MemoryStore(read_only=False) + case "remote": + return RemoteStore(fs=s3, path=test_bucket_name, read_only=False) + case "zip": + return await ZipStore.open(path + "/zarr.zip", read_only=False, mode="w") + raise AssertionError @pytest.fixture(params=[str, pathlib.Path]) def path_type(request: pytest.FixtureRequest) -> Any: + """ + A pytest fixture that provides a parameterized path type. + + This fixture yields different types of path representations + for testing purposes. The possible types are `str` and + `pathlib.Path`. It can be used to test functions or methods + that need to handle different path type inputs. + + Returns: + The path type specified by the current parameter. + """ return request.param @@ -51,34 +83,20 @@ async def store_path(tmpdir: LEGACY_PATH) -> StorePath: @pytest.fixture -async def local_store(tmpdir: LEGACY_PATH) -> LocalStore: - return await LocalStore.open(str(tmpdir)) - - -@pytest.fixture -async def remote_store(url: str) -> RemoteStore: - return await RemoteStore.open(url) - - -@pytest.fixture -async def memory_store() -> MemoryStore: - return await MemoryStore.open() - - -@pytest.fixture -async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: - return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") - - -@pytest.fixture -async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: +async def store( + request: pytest.FixtureRequest, + tmpdir: LEGACY_PATH, + s3: s3fs.S3FileSystem, # type: ignore[name-defined] +) -> AsyncGenerator[Store, None]: param = request.param - return await parse_store(param, str(tmpdir)) + store_instance = await parse_store(param, str(tmpdir), s3) + yield store_instance + store_instance.close() @pytest.fixture(params=["local", "memory", "zip"]) -def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store: - result = sync(parse_store(request.param, str(tmp_path))) +def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH, s3_base: str) -> Store: + result = sync(parse_store(request.param, str(tmp_path), s3_base)) if not isinstance(result, Store): raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") return result @@ -92,10 +110,12 @@ class AsyncGroupRequest: @pytest.fixture -async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: +async def async_group( + request: pytest.FixtureRequest, tmpdir: LEGACY_PATH, s3_base: str +) -> AsyncGroup: param: AsyncGroupRequest = request.param - store = await parse_store(param.store, str(tmpdir)) + store = await parse_store(param.store, str(tmpdir), s3_base) return await AsyncGroup.from_store( store, attributes=param.attributes, @@ -148,6 +168,56 @@ def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: raise ValueError(msg) +@pytest.fixture(scope="module") +def s3_base() -> Generator[str, None, None]: + # writable local S3 system + from moto.server import ThreadedMotoServer + + if "AWS_SECRET_ACCESS_KEY" not in os.environ: + os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" + if "AWS_ACCESS_KEY_ID" not in os.environ: + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + server = ThreadedMotoServer(ip_address="127.0.0.1", port=0) + server.start() + host, port = server._server.server_address + endpoint_url = f"http://{host}:{port}" + + yield endpoint_url + server.stop() + + +def get_boto3_client(endpoint_url: str) -> botocore.client.BaseClient: + # NB: we use the sync botocore client for setup + session = Session() + return session.create_client("s3", endpoint_url=endpoint_url, region_name="us-east-1") + + +@pytest.fixture(autouse=True) +def s3(s3_base: str) -> Generator[s3fs.S3FileSystem, None, None]: # type: ignore[name-defined] + """ + Quoting Martin Durant: + pytest-asyncio creates a new event loop for each async test. + When an async-mode s3fs instance is made from async, it will be assigned to the loop from + which it is made. That means that if you use s3fs again from a subsequent test, + you will have the same identical instance, but be running on a different loop - which fails. + + For the rest: it's very convenient to clean up the state of the store between tests, + make sure we start off blank each time. + + https://github.com/zarr-developers/zarr-python/pull/1785#discussion_r1634856207 + """ + client = get_boto3_client(s3_base) + client.create_bucket(Bucket=test_bucket_name, ACL="public-read") + s3fs.S3FileSystem.clear_instance_cache() + s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": s3_base}, asynchronous=True) + session = sync(s3.set_session()) + s3.invalidate_cache() + yield s3 + requests.post(f"{s3_base}/moto-api/reset") + client.close() + sync(session.close()) + + settings.register_profile( "ci", max_examples=1000, diff --git a/tests/test_api.py b/tests/test_api.py index c7fc88241f..1d4f878937 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,14 +22,15 @@ save_group, ) from zarr.core.common import MemoryOrder, ZarrFormat +from zarr.core.sync import sync from zarr.errors import MetadataValidationError from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore +from zarr.storage.zip import ZipStore -def test_create_array(memory_store: Store) -> None: - store = memory_store - +@pytest.mark.parametrize("store", ["local", "memory", "remote"], indirect=True) +def test_create_array(store: Store) -> None: # create array z = create(shape=100, store=store) assert isinstance(z, Array) @@ -48,51 +49,61 @@ def test_create_array(memory_store: Store) -> None: assert z.chunks == (40,) +@pytest.mark.parametrize("store", ["local", "memory", "remote", "zip"], indirect=True) @pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) @pytest.mark.parametrize("node_type", ["array", "group"]) def test_open_normalized_path( - memory_store: MemoryStore, path: str, node_type: Literal["array", "group"] + store: Store, path: str, node_type: Literal["array", "group"] ) -> None: node: Group | Array if node_type == "group": - node = group(store=memory_store, path=path) + node = group(store=store, path=path) elif node_type == "array": - node = create(store=memory_store, path=path, shape=(2,)) + node = create(store=store, path=path, shape=(2,)) assert node.path == normalize_path(path) -async def test_open_array(memory_store: MemoryStore) -> None: - store = memory_store - +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", pytest.param("zip", marks=pytest.mark.xfail)], + indirect=True, +) +async def test_open_array(store: Store, zarr_format: ZarrFormat) -> None: # open array, create if doesn't exist - z = open(store=store, shape=100) + z = open(store=store, shape=100, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (100,) - # open array, overwrite - # store._store_dict = {} - store = MemoryStore() - z = open(store=store, shape=200) + # invoke open again, with a different shape and mode w. + # We expect the store to be wiped at the current path and new array to come out. + z = open(store=store, shape=200, zarr_format=zarr_format, mode="w") assert isinstance(z, Array) assert z.shape == (200,) - # open array, read-only - store_cls = type(store) - ro_store = await store_cls.open(store_dict=store._store_dict, read_only=True) - z = open(store=ro_store, mode="r") + store_r = store._as_immutable() + z = open(store=store_r, zarr_format=zarr_format, mode="r") assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only - # path not found - with pytest.raises(FileNotFoundError): - open(store="doesnotexist", mode="r") +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +def test_open_path_not_found(store: Store, zarr_format: ZarrFormat) -> None: + with pytest.raises(FileNotFoundError): + open(store=store, path="doesnotexist", mode="r", zarr_format=zarr_format) -async def test_open_group(memory_store: MemoryStore) -> None: - store = memory_store +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +async def test_open_group(store: Store) -> None: # open group, create if doesn't exist g = open_group(store=store) g.create_group("foo") @@ -103,28 +114,65 @@ async def test_open_group(memory_store: MemoryStore) -> None: # g = open_group(store=store) # assert isinstance(g, Group) # assert "foo" not in g + store_r = store._as_immutable() - # open group, read-only - store_cls = type(store) - ro_store = await store_cls.open(store_dict=store._store_dict, read_only=True) - g = open_group(store=ro_store, mode="r") + g = open_group(store=store_r, mode="r") assert isinstance(g, Group) - assert g.read_only + if isinstance(store, ZipStore): + store.close() + +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +async def test_open_array_or_group(zarr_format: ZarrFormat, store: Store) -> None: + # create a group and an array + grp_attrs = {"foo": "bar"} + grp_w = group(store=store, path="group", zarr_format=zarr_format, attributes=grp_attrs) + arr_w = grp_w.create_array(name="foo", shape=(1,)) + + store_2: Store + + if isinstance(store, ZipStore): + store.close() + store_2 = await ZipStore.open(store.path, mode="r") + else: + store_2 = store + + grp_r = open(store=store_2, path="group", mode="r", zarr_format=zarr_format) + assert isinstance(grp_r, Group) + assert grp_r.attrs == grp_attrs + + arr_r = open(store=store_2, path="group/foo", mode="r", zarr_format=zarr_format) + assert isinstance(arr_r, Array) + assert arr_r.shape == arr_w.shape + + +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) @pytest.mark.parametrize("zarr_format", [None, 2, 3]) -async def test_open_group_unspecified_version( - tmpdir: pathlib.Path, zarr_format: ZarrFormat -) -> None: +async def test_open_group_unspecified_version(store: Store, zarr_format: ZarrFormat) -> None: """Regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" # create a group with specified zarr format (could be 2, 3, or None) _ = await zarr.api.asynchronous.open_group( - store=str(tmpdir), mode="w", zarr_format=zarr_format, attributes={"foo": "bar"} + store=store, mode="a", zarr_format=zarr_format, attributes={"foo": "bar"} ) + store_2: Store + if isinstance(store, ZipStore): + store.close() + store_2 = await ZipStore.open(store.path, mode="r") + else: + store_2 = store # now open that group without specifying the format - g2 = await zarr.api.asynchronous.open_group(store=str(tmpdir), mode="r") + g2 = await zarr.api.asynchronous.open_group(store=store_2, mode="r") assert g2.attrs == {"foo": "bar"} @@ -175,7 +223,28 @@ def test_save_errors() -> None: zarr.save("data/example.zarr", a, mode="w") -def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +def test_open_store_with_mode_r(store: Store) -> None: + # 'r' means read only (must exist) + with pytest.raises(FileNotFoundError): + zarr.open(store=store, mode="r") + + z1 = zarr.ones(store=store, shape=(3, 3)) + assert z1.fill_value == 1 + + z2 = zarr.open(store=store, mode="r") + assert isinstance(z2, Array) + assert z2.fill_value == 1 + assert (z2[:] == 1).all() + with pytest.raises(ValueError): + z2[:] = 3 + + +def test_open_path_with_mode_r(tmp_path: pathlib.Path) -> None: # 'r' means read only (must exist) with pytest.raises(FileNotFoundError): zarr.open(store=tmp_path, mode="r") @@ -189,52 +258,73 @@ def test_open_with_mode_r(tmp_path: pathlib.Path) -> None: z2[:] = 3 -def test_open_with_mode_r_plus(tmp_path: pathlib.Path) -> None: +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +def test_open_with_mode_r_plus(store: Store) -> None: # 'r+' means read/write (must exist) with pytest.raises(FileNotFoundError): - zarr.open(store=tmp_path, mode="r+") - zarr.ones(store=tmp_path, shape=(3, 3)) - z2 = zarr.open(store=tmp_path, mode="r+") + zarr.open(store=store, mode="r+") + zarr.ones(store=store, shape=(3, 3)) + z2 = zarr.open(store=store, mode="r+") assert isinstance(z2, Array) assert (z2[:] == 1).all() z2[:] = 3 -async def test_open_with_mode_a(tmp_path: pathlib.Path) -> None: +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", pytest.param("zip", marks=pytest.mark.xfail)], + indirect=True, +) +async def test_open_with_mode_a(store: Store) -> None: # Open without shape argument should default to group - g = zarr.open(store=tmp_path, mode="a") + g = zarr.open(store=store, mode="a") assert isinstance(g, Group) - await g.store_path.delete() + # this causes an exception for remotestore if it is awaited. no clue why. + sync(store.delete("zarr.json")) # 'a' means read/write (create if doesn't exist) - arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) + arr = zarr.open(store=store, mode="a", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 1 - z2 = zarr.open(store=tmp_path, mode="a") + z2 = zarr.open(store=store, mode="a") assert isinstance(z2, Array) assert (z2[:] == 1).all() z2[:] = 3 -def test_open_with_mode_w(tmp_path: pathlib.Path) -> None: +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", pytest.param("zip", marks=pytest.mark.xfail)], + indirect=True, +) +def test_open_with_mode_w(store: Store) -> None: # 'w' means create (overwrite if exists); - arr = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) + arr = zarr.open(store=store, mode="w", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 3 - z2 = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) + z2 = zarr.open(store=store, mode="w", shape=(3, 3)) assert isinstance(z2, Array) assert not (z2[:] == 3).all() z2[:] = 3 -def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: +@pytest.mark.parametrize( + "store", + ["local", "memory", "remote", "zip"], + indirect=True, +) +def test_open_with_mode_w_minus(store: Store) -> None: # 'w-' means create (fail if exists) - arr = zarr.open(store=tmp_path, mode="w-", shape=(3, 3)) + arr = zarr.open(store=store, mode="w-", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 1 with pytest.raises(FileExistsError): - zarr.open(store=tmp_path, mode="w-") + zarr.open(store=store, mode="w-") @pytest.mark.parametrize("order", ["C", "F", None]) @@ -269,8 +359,8 @@ def test_array_order(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None # assert "LazyLoader: " in repr(loader) -def test_load_array(memory_store: Store) -> None: - store = memory_store +@pytest.mark.parametrize("store", ["local", "memory", "remote", "zip"], indirect=True) +def test_load_array(store: Store) -> None: foo = np.arange(100) bar = np.arange(100, 0, -1) save(store, foo=foo, bar=bar) @@ -964,13 +1054,14 @@ def test_tree() -> None: # copy(source["foo"], dest, dry_run=True, log=True) -def test_open_positional_args_deprecated() -> None: - store = MemoryStore() +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_open_positional_args_deprecated(store: MemoryStore) -> None: with pytest.warns(FutureWarning, match="pass"): open(store, "w", shape=(1,)) -def test_save_array_positional_args_deprecated() -> None: +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_save_array_positional_args_deprecated(store: MemoryStore) -> None: store = MemoryStore() with warnings.catch_warnings(): warnings.filterwarnings( @@ -986,21 +1077,21 @@ def test_save_array_positional_args_deprecated() -> None: ) -def test_group_positional_args_deprecated() -> None: - store = MemoryStore() +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_group_positional_args_deprecated(store: MemoryStore) -> None: with pytest.warns(FutureWarning, match="pass"): group(store, True) -def test_open_group_positional_args_deprecated() -> None: - store = MemoryStore() +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_open_group_positional_args_deprecated(store: MemoryStore) -> None: with pytest.warns(FutureWarning, match="pass"): open_group(store, "w") -def test_open_falls_back_to_open_group() -> None: +@pytest.mark.parametrize("store", ["local", "memory", "remote", "zip"], indirect=True) +def test_open_falls_back_to_open_group(store: Store) -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 - store = MemoryStore() zarr.open_group(store, attributes={"key": "value"}) group = zarr.open(store) @@ -1008,9 +1099,9 @@ def test_open_falls_back_to_open_group() -> None: assert group.attrs == {"key": "value"} -async def test_open_falls_back_to_open_group_async() -> None: +@pytest.mark.parametrize("store", ["local", "memory", "remote", "zip"], indirect=True) +async def test_open_falls_back_to_open_group_async(store: Store) -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 - store = MemoryStore() await zarr.api.asynchronous.open_group(store, attributes={"key": "value"}) group = await zarr.api.asynchronous.open(store=store) @@ -1034,7 +1125,7 @@ async def test_metadata_validation_error() -> None: @pytest.mark.parametrize( "store", - ["local", "memory", "zip"], + ["local", "memory", "zip", "remote"], indirect=True, ) def test_open_array_with_mode_r_plus(store: Store) -> None: diff --git a/tests/test_array.py b/tests/test_array.py index f0f36cf70d..da88adea29 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -10,22 +10,19 @@ import zarr.api.asynchronous from zarr import Array, AsyncArray, Group from zarr.codecs import BytesCodec, VLenBytesCodec -from zarr.core._info import ArrayInfo from zarr.core.array import chunks_initialized from zarr.core.buffer import default_buffer_prototype from zarr.core.buffer.cpu import NDBuffer from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import ceildiv -from zarr.core.metadata.v3 import DataType from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.storage import LocalStore, MemoryStore from zarr.storage.common import StorePath -@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("exists_ok", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_array_creation_existing_node( @@ -75,8 +72,7 @@ def test_array_creation_existing_node( ) -@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_create_creates_parents( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -115,8 +111,7 @@ async def test_create_creates_parents( assert isinstance(g, AsyncGroup) -@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_array_name_properties_no_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -126,8 +121,7 @@ def test_array_name_properties_no_group( assert arr.basename == "" -@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_array_name_properties_with_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -256,7 +250,6 @@ async def test_array_v3_nan_fill_value(store: MemoryStore) -> None: @pytest.mark.parametrize("store", ["local"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) async def test_serializable_async_array( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: @@ -274,7 +267,6 @@ async def test_serializable_async_array( @pytest.mark.parametrize("store", ["local"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = Array.create( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" @@ -430,7 +422,6 @@ def test_vlen_errors() -> None: ) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_update_attrs(zarr_format: int) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 store = MemoryStore() @@ -442,41 +433,7 @@ def test_update_attrs(zarr_format: int) -> None: assert arr2.attrs["foo"] == "bar" -class TestInfo: - def test_info_v2(self) -> None: - arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=2) - result = arr.info - expected = ArrayInfo( - _zarr_format=2, - _data_type=np.dtype("float64"), - _shape=(4, 4), - _chunk_shape=(2, 2), - _order="C", - _read_only=False, - _store_type="MemoryStore", - _count_bytes=128, - ) - assert result == expected - - def test_info_v3(self) -> None: - arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3) - result = arr.info - expected = ArrayInfo( - _zarr_format=3, - _data_type=DataType.parse("float64"), - _shape=(4, 4), - _chunk_shape=(2, 2), - _order="C", - _read_only=False, - _store_type="MemoryStore", - _codecs=[BytesCodec()], - _count_bytes=128, - ) - assert result == expected - - @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_resize_1d(store: MemoryStore, zarr_format: int) -> None: z = zarr.create( shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format @@ -515,7 +472,6 @@ def test_resize_1d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_resize_2d(store: MemoryStore, zarr_format: int) -> None: z = zarr.create( shape=(105, 105), @@ -577,7 +533,6 @@ def test_resize_2d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_append_1d(store: MemoryStore, zarr_format: int) -> None: a = np.arange(105) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) @@ -607,7 +562,6 @@ def test_append_1d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_append_2d(store: MemoryStore, zarr_format: int) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( @@ -631,7 +585,6 @@ def test_append_2d(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( @@ -653,7 +606,6 @@ def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -@pytest.mark.parametrize("zarr_format", [2, 3]) def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None: a = np.arange(100) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) @@ -664,7 +616,6 @@ def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None: @pytest.mark.parametrize("order", ["C", "F", None]) -@pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_array_create_order( order: MemoryOrder | None, zarr_format: int, store: MemoryStore diff --git a/tests/test_group.py b/tests/test_group.py index afa290207d..bd8b6a8f98 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -3,6 +3,7 @@ import contextlib import operator import pickle +import sys import warnings from typing import TYPE_CHECKING, Any, Literal @@ -14,7 +15,6 @@ import zarr.api.synchronous import zarr.storage from zarr import Array, AsyncArray, AsyncGroup, Group -from zarr.abc.store import Store from zarr.core._info import GroupInfo from zarr.core.buffer import default_buffer_prototype from zarr.core.group import ConsolidatedMetadata, GroupMetadata @@ -23,22 +23,11 @@ from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore from zarr.storage.common import make_store_path -from .conftest import parse_store - if TYPE_CHECKING: - from _pytest.compat import LEGACY_PATH - + from zarr.abc.store import Store from zarr.core.common import JSON, ZarrFormat -@pytest.fixture(params=["local", "memory", "zip"]) -async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: - result = await parse_store(request.param, str(tmpdir)) - if not isinstance(result, Store): - raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") - return result - - @pytest.fixture(params=[True, False]) def exists_ok(request: pytest.FixtureRequest) -> bool: result = request.param @@ -47,6 +36,7 @@ def exists_ok(request: pytest.FixtureRequest) -> bool: return result +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. @@ -56,6 +46,7 @@ def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: assert group._async_group == agroup +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> None: # prepare a root node, with some data set await zarr.api.asynchronous.open_group( @@ -108,6 +99,7 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> assert g.attrs == {} +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic properties of groups @@ -128,6 +120,7 @@ def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None: assert bar.basename == "bar" +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) @pytest.mark.parametrize("consolidated_metadata", [True, False]) def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metadata: bool) -> None: """ @@ -210,6 +203,7 @@ def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metad members_observed = group.members(max_depth=-1) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic Group routines. @@ -252,6 +246,7 @@ def test_group(store: Store, zarr_format: ZarrFormat) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> None: """ Test that `Group.from_store` works as expected. @@ -268,6 +263,7 @@ def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: """ Test the `Group.open` method. @@ -305,6 +301,7 @@ def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> N @pytest.mark.parametrize("consolidated", [True, False]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_getitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: """ Test the `Group.__getitem__` method. @@ -361,6 +358,7 @@ def test_group_getitem(store: Store, zarr_format: ZarrFormat, consolidated: bool group["subarray/subsubarray"] +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_get_with_default(store: Store, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) @@ -381,6 +379,7 @@ def test_group_get_with_default(store: Store, zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize("consolidated", [True, False]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_delitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: """ Test the `Group.__delitem__` method. @@ -410,6 +409,7 @@ def test_group_delitem(store: Store, zarr_format: ZarrFormat, consolidated: bool group["subarray"] +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__iter__` method. @@ -419,6 +419,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: assert list(group) == [] +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__len__` method. @@ -428,6 +429,7 @@ def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: assert len(group) == 0 +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__setitem__` method. @@ -454,6 +456,7 @@ def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: np.testing.assert_array_equal(group[key], arr) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__contains__` method @@ -464,6 +467,7 @@ def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: assert "foo" in group +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) @pytest.mark.parametrize("consolidate", [True, False]) def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidate: bool): group = Group.from_store(store, zarr_format=zarr_format) @@ -569,6 +573,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat assert sorted(group.array_values(), key=lambda x: x.name) == expected_array_values +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes` @@ -581,19 +586,8 @@ def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: assert new_group.attrs == new_attrs -async def test_group_update_attributes_async(store: Store, zarr_format: ZarrFormat) -> None: - """ - Test the behavior of `Group.update_attributes_async` - """ - attrs = {"foo": 100} - group = Group.from_store(store, zarr_format=zarr_format, attributes=attrs) - assert group.attrs == attrs - new_attrs = {"bar": 100} - new_group = await group.update_attributes_async(new_attrs) - assert new_group.attrs == new_attrs - - @pytest.mark.parametrize("method", ["create_array", "array"]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_create_array( store: Store, zarr_format: ZarrFormat, @@ -628,6 +622,7 @@ def test_group_create_array( assert np.array_equal(array[:], data) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_group_array_creation( store: Store, zarr_format: ZarrFormat, @@ -696,9 +691,7 @@ def test_group_array_creation( assert full_like_array.store_path.store == store -@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) -@pytest.mark.parametrize("exists_ok", [True, False]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( store: Store, @@ -745,6 +738,7 @@ def test_group_creation_existing_node( ) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_create( store: Store, exists_ok: bool, @@ -787,6 +781,7 @@ async def test_asyncgroup_create( ) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: attributes = {"foo": 100} agroup = await AsyncGroup.from_store(store, zarr_format=zarr_format, attributes=attributes) @@ -794,6 +789,17 @@ async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: assert agroup.attrs == agroup.metadata.attributes == attributes +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) +async def test_asyncgroup_info(store: Store, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.from_store( # noqa: F841 + store, + zarr_format=zarr_format, + ) + pytest.xfail("Info is not implemented for metadata yet") + # assert agroup.info == agroup.metadata.info + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_open( store: Store, zarr_format: ZarrFormat, @@ -815,6 +821,7 @@ async def test_asyncgroup_open( assert group_w == group_r +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_open_wrong_format( store: Store, zarr_format: ZarrFormat, @@ -835,6 +842,7 @@ async def test_asyncgroup_open_wrong_format( # todo: replace the dict[str, Any] type with something a bit more specific # should this be async? +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) @pytest.mark.parametrize( "data", [ @@ -857,6 +865,7 @@ def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: # todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those @@ -879,6 +888,7 @@ async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None await agroup.getitem("foo") +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") @@ -915,6 +925,7 @@ async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None raise AssertionError +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_create_group( store: Store, zarr_format: ZarrFormat, @@ -931,6 +942,7 @@ async def test_asyncgroup_create_group( assert subnode.metadata.zarr_format == zarr_format +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_create_array( store: Store, zarr_format: ZarrFormat, exists_ok: bool ) -> None: @@ -970,6 +982,7 @@ async def test_asyncgroup_create_array( assert subnode.metadata.zarr_format == zarr_format +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test that the AsyncGroup.update_attributes method works correctly. @@ -984,8 +997,7 @@ async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrForma assert agroup_new_attributes.attrs == attributes_new -@pytest.mark.parametrize("store", ["local"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = await AsyncGroup.from_store( store=store, attributes={"foo": 999}, zarr_format=zarr_format @@ -995,8 +1007,7 @@ async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrForm assert actual == expected -@pytest.mark.parametrize("store", ["local"], indirect=["store"]) -@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_serializable_sync_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = Group.from_store(store=store, attributes={"foo": 999}, zarr_format=zarr_format) p = pickle.dumps(expected) @@ -1005,6 +1016,7 @@ def test_serializable_sync_group(store: LocalStore, zarr_format: ZarrFormat) -> @pytest.mark.parametrize("consolidated_metadata", [True, False]) +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_group_members_async(store: Store, consolidated_metadata: bool) -> None: group = await AsyncGroup.from_store( store=store, @@ -1080,7 +1092,8 @@ async def test_group_members_async(store: Store, consolidated_metadata: bool) -> assert nmembers == 4 -async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) +async def test_require_group(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) # create foo group @@ -1110,7 +1123,8 @@ async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrF await foo_group.require_group("bar") -async def test_require_groups(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) +async def test_require_groups(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) # create foo group _ = await root.create_group("foo", attributes={"foo": 100}) @@ -1131,6 +1145,7 @@ async def test_require_groups(store: LocalStore | MemoryStore, zarr_format: Zarr assert no_group == () +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_create_dataset(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) with pytest.warns(DeprecationWarning): @@ -1145,6 +1160,7 @@ async def test_create_dataset(store: Store, zarr_format: ZarrFormat) -> None: await root.create_dataset("bar", shape=(100,), dtype="int8") +@pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_require_array(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) foo1 = await root.require_array("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) @@ -1171,6 +1187,22 @@ async def test_require_array(store: Store, zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize("consolidate", [True, False]) +@pytest.mark.parametrize( + "store", + [ + "local", + "memory", + pytest.param( + "remote", + marks=pytest.mark.xfail( + sys.version_info >= (3, 12), + reason="Valid warnings are raised from botocore on python 3.12+", + ), + ), + "zip", + ], + indirect=True, +) async def test_members_name(store: Store, consolidate: bool, zarr_format: ZarrFormat): group = Group.from_store(store=store, zarr_format=zarr_format) a = group.create_group(name="a") @@ -1210,6 +1242,7 @@ def test_open_mutable_mapping_sync(): class TestConsolidated: + @pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_group_getitem_consolidated(self, store: Store) -> None: root = await AsyncGroup.from_store(store=store) # Set up the test structure with @@ -1260,6 +1293,7 @@ async def test_group_getitem_consolidated(self, store: Store) -> None: rg2 = await rg1.getitem("g2") assert rg2.metadata.consolidated_metadata == ConsolidatedMetadata(metadata={}) + @pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_group_delitem_consolidated(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") @@ -1296,6 +1330,7 @@ async def test_group_delitem_consolidated(self, store: Store) -> None: assert len(group.metadata.consolidated_metadata.metadata) == 1 assert "g0" not in group.metadata.consolidated_metadata.metadata + @pytest.mark.parametrize("store", ["local", "memory"], indirect=True) def test_open_consolidated_raises(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") @@ -1316,6 +1351,7 @@ def test_open_consolidated_raises(self, store: Store) -> None: group = zarr.open_group(store=store, use_consolidated=False) assert group.metadata.consolidated_metadata is None + @pytest.mark.parametrize("store", ["local", "memory"], indirect=True) async def test_open_consolidated_raises_async(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") diff --git a/tests/test_info.py b/tests/test_info.py index 5d9264aa13..640ad41cb7 100644 --- a/tests/test_info.py +++ b/tests/test_info.py @@ -7,10 +7,7 @@ from zarr.core._info import ArrayInfo, GroupInfo, human_readable_size from zarr.core.common import ZarrFormat -ZARR_FORMATS = [2, 3] - -@pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_group_info_repr(zarr_format: ZarrFormat) -> None: info = GroupInfo( _name="a", _store_type="MemoryStore", _read_only=False, _zarr_format=zarr_format @@ -25,7 +22,6 @@ def test_group_info_repr(zarr_format: ZarrFormat) -> None: assert result == expected -@pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_group_info_complete(zarr_format: ZarrFormat) -> None: info = GroupInfo( _name="a", @@ -49,7 +45,6 @@ def test_group_info_complete(zarr_format: ZarrFormat) -> None: assert result == expected -@pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_array_info(zarr_format: ZarrFormat) -> None: info = ArrayInfo( _zarr_format=zarr_format, @@ -74,7 +69,6 @@ def test_array_info(zarr_format: ZarrFormat) -> None: Codecs : [{{'endian': }}]""") -@pytest.mark.parametrize("zarr_format", ZARR_FORMATS) @pytest.mark.parametrize("bytes_things", [(1_000_000, "976.6K", 500_000, "500000", "2.0", 5)]) def test_array_info_complete( zarr_format: ZarrFormat, bytes_things: tuple[int, str, int, str, str, int] diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index 8ae9cc81fd..007e5e3506 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -28,7 +28,8 @@ @pytest.fixture -async def memory_store_with_hierarchy(memory_store: Store) -> None: +async def memory_store_with_hierarchy() -> None: + memory_store = zarr.storage.MemoryStore() g = await group(store=memory_store, attributes={"foo": "bar"}) await g.create_array(name="air", shape=(1, 2, 3)) await g.create_array(name="lat", shape=(1,)) @@ -199,15 +200,20 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: "time", ] - def test_consolidated_sync(self, memory_store): - g = zarr.api.synchronous.group(store=memory_store, attributes={"foo": "bar"}) + @pytest.mark.parametrize( + "store", + ["memory"], + indirect=True, + ) + def test_consolidated_sync(self, store: Store): + g = zarr.api.synchronous.group(store=store, attributes={"foo": "bar"}) g.create_array(name="air", shape=(1, 2, 3)) g.create_array(name="lat", shape=(1,)) g.create_array(name="lon", shape=(2,)) g.create_array(name="time", shape=(3,)) - zarr.api.synchronous.consolidate_metadata(memory_store) - group2 = zarr.api.synchronous.Group.open(memory_store) + zarr.api.synchronous.consolidate_metadata(store) + group2 = zarr.api.synchronous.Group.open(store) array_metadata = { "attributes": {}, @@ -273,15 +279,20 @@ def test_consolidated_sync(self, memory_store): ), ) assert group2.metadata == expected - group3 = zarr.api.synchronous.open(store=memory_store) + group3 = zarr.api.synchronous.open(store=store) assert group3.metadata == expected - group4 = zarr.api.synchronous.open_consolidated(store=memory_store) + group4 = zarr.api.synchronous.open_consolidated(store=store) assert group4.metadata == expected - async def test_not_writable_raises(self, memory_store: zarr.storage.MemoryStore) -> None: - await group(store=memory_store, attributes={"foo": "bar"}) - read_store = zarr.storage.MemoryStore(store_dict=memory_store._store_dict, read_only=True) + @pytest.mark.parametrize( + "store", + ["memory"], + indirect=True, + ) + async def test_not_writable_raises(self, store: zarr.storage.MemoryStore) -> None: + await group(store=store, attributes={"foo": "bar"}) + read_store = zarr.storage.MemoryStore(store_dict=store._store_dict, read_only=True) with pytest.raises(ValueError, match="does not support writing"): await consolidate_metadata(read_store) @@ -455,7 +466,6 @@ def test_to_dict_empty(self): } assert result == expected - @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat): store = zarr.storage.MemoryStore() await AsyncGroup.from_store(store, zarr_format=zarr_format) @@ -506,33 +516,35 @@ async def test_consolidated_metadata_v2(self): ) assert result.metadata == expected - @pytest.mark.parametrize("zarr_format", [2, 3]) + @pytest.mark.parametrize( + "store", + ["memory"], + indirect=True, + ) async def test_use_consolidated_false( - self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat + self, store: zarr.storage.MemoryStore, zarr_format: ZarrFormat ) -> None: with zarr.config.set(default_zarr_version=zarr_format): - g = await group(store=memory_store, attributes={"foo": "bar"}) + g = await group(store=store, attributes={"foo": "bar"}) await g.create_group(name="a") # test a stale read - await zarr.api.asynchronous.consolidate_metadata(memory_store) + await zarr.api.asynchronous.consolidate_metadata(store) await g.create_group(name="b") - stale = await zarr.api.asynchronous.open_group(store=memory_store) + stale = await zarr.api.asynchronous.open_group(store=store) assert len([x async for x in stale.members()]) == 1 assert stale.metadata.consolidated_metadata assert list(stale.metadata.consolidated_metadata.metadata) == ["a"] # bypass stale data - good = await zarr.api.asynchronous.open_group( - store=memory_store, use_consolidated=False - ) + good = await zarr.api.asynchronous.open_group(store=store, use_consolidated=False) assert len([x async for x in good.members()]) == 2 # reconsolidate - await zarr.api.asynchronous.consolidate_metadata(memory_store) + await zarr.api.asynchronous.consolidate_metadata(store) - good = await zarr.api.asynchronous.open_group(store=memory_store) + good = await zarr.api.asynchronous.open_group(store=store) assert len([x async for x in good.members()]) == 2 assert good.metadata.consolidated_metadata assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py index 003aef331f..3a956142d8 100644 --- a/tests/test_metadata/test_v2.py +++ b/tests/test_metadata/test_v2.py @@ -82,9 +82,7 @@ def test_metadata_to_dict( class TestConsolidated: @pytest.fixture - async def v2_consolidated_metadata( - self, memory_store: zarr.storage.MemoryStore - ) -> zarr.storage.MemoryStore: + async def v2_consolidated_metadata(self) -> zarr.storage.MemoryStore: zmetadata = { "metadata": { ".zattrs": { diff --git a/tests/test_store/test_remote.py b/tests/test_store/test_remote.py index aee620796c..323772a5fd 100644 --- a/tests/test_store/test_remote.py +++ b/tests/test_store/test_remote.py @@ -1,93 +1,24 @@ from __future__ import annotations import json -import os -from typing import TYPE_CHECKING import fsspec import pytest -from botocore.session import Session from upath import UPath import zarr.api.asynchronous from zarr.core.buffer import Buffer, cpu, default_buffer_prototype -from zarr.core.sync import _collect_aiterator, sync +from zarr.core.sync import _collect_aiterator from zarr.storage import RemoteStore from zarr.testing.store import StoreTests -if TYPE_CHECKING: - from collections.abc import Generator +from ..conftest import test_bucket_name - import botocore.client - -s3fs = pytest.importorskip("s3fs") -requests = pytest.importorskip("requests") -moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server") -moto = pytest.importorskip("moto") - -# ### amended from s3fs ### # -test_bucket_name = "test" -secure_bucket_name = "test-secure" -port = 5555 -endpoint_url = f"http://127.0.0.1:{port}/" - - -@pytest.fixture(scope="module") -def s3_base() -> Generator[None, None, None]: - # writable local S3 system - - # This fixture is module-scoped, meaning that we can reuse the MotoServer across all tests - server = moto_server.ThreadedMotoServer(ip_address="127.0.0.1", port=port) - server.start() - if "AWS_SECRET_ACCESS_KEY" not in os.environ: - os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" - if "AWS_ACCESS_KEY_ID" not in os.environ: - os.environ["AWS_ACCESS_KEY_ID"] = "foo" - - yield - server.stop() - - -def get_boto3_client() -> botocore.client.BaseClient: - # NB: we use the sync botocore client for setup - session = Session() - return session.create_client("s3", endpoint_url=endpoint_url) - - -@pytest.fixture(autouse=True) -def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: - """ - Quoting Martin Durant: - pytest-asyncio creates a new event loop for each async test. - When an async-mode s3fs instance is made from async, it will be assigned to the loop from - which it is made. That means that if you use s3fs again from a subsequent test, - you will have the same identical instance, but be running on a different loop - which fails. - - For the rest: it's very convenient to clean up the state of the store between tests, - make sure we start off blank each time. - - https://github.com/zarr-developers/zarr-python/pull/1785#discussion_r1634856207 - """ - client = get_boto3_client() - client.create_bucket(Bucket=test_bucket_name, ACL="public-read") - s3fs.S3FileSystem.clear_instance_cache() - s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_url}) - session = sync(s3.set_session()) - s3.invalidate_cache() - yield s3 - requests.post(f"{endpoint_url}/moto-api/reset") - client.close() - sync(session.close()) - - -# ### end from s3fs ### # - - -async def test_basic() -> None: +async def test_basic(s3_base) -> None: store = RemoteStore.from_url( f"s3://{test_bucket_name}/foo/spam/", - storage_options={"endpoint_url": endpoint_url, "anon": False}, + storage_options={"endpoint_url": s3_base, "anon": False}, ) assert store.fs.asynchronous assert store.path == f"{test_bucket_name}/foo/spam" @@ -108,9 +39,9 @@ class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): buffer_cls = cpu.Buffer @pytest.fixture - def store_kwargs(self, request) -> dict[str, str | bool]: + def store_kwargs(self, request, s3_base) -> dict[str, str | bool]: fs, path = fsspec.url_to_fs( - f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True + f"s3://{test_bucket_name}", endpoint_url=s3_base, anon=False, asynchronous=True ) return {"fs": fs, "path": path} @@ -144,9 +75,9 @@ def test_store_supports_partial_writes(self, store: RemoteStore) -> None: def test_store_supports_listing(self, store: RemoteStore) -> None: assert store.supports_listing - async def test_remote_store_from_uri(self, store: RemoteStore): + async def test_remote_store_from_uri(self, store: RemoteStore, s3_base): storage_options = { - "endpoint_url": endpoint_url, + "endpoint_url": s3_base, "anon": False, } @@ -181,15 +112,15 @@ async def test_remote_store_from_uri(self, store: RemoteStore): ) assert dict(group.attrs) == {"key": "value-3"} - def test_from_upath(self) -> None: + def test_from_upath(self, s3_base) -> None: path = UPath( f"s3://{test_bucket_name}/foo/bar/", - endpoint_url=endpoint_url, + endpoint_url=s3_base, anon=False, asynchronous=True, ) result = RemoteStore.from_upath(path) - assert result.fs.endpoint_url == endpoint_url + assert result.fs.endpoint_url == s3_base assert result.fs.asynchronous assert result.path == f"{test_bucket_name}/foo/bar" @@ -201,9 +132,9 @@ def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None: ): self.store_cls(**store_kwargs) - def test_init_warns_if_fs_asynchronous_is_false(self) -> None: + def test_init_warns_if_fs_asynchronous_is_false(self, s3_base) -> None: fs, path = fsspec.url_to_fs( - f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False + f"s3://{test_bucket_name}", endpoint_url=s3_base, anon=False, asynchronous=False ) store_kwargs = {"fs": fs, "path": path} with pytest.warns(UserWarning, match=r".* was not created with `asynchronous=True`.*"):