Skip to content

Test ManifestStore against S3 and fix key parsing #507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 27, 2025
Merged
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
environment: [test-py311, test-py312, upstream, min-deps]
environment: [test-py311, test-py312, upstream, min-deps, minio]
steps:
- uses: actions/checkout@v4
- uses: prefix-dev/[email protected]
Expand Down
7 changes: 7 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def pytest_addoption(parser):
action="store_true",
help="runs tests requiring a network connection",
)
parser.addoption(
"--run-minio-tests",
action="store_true",
help="runs tests requiring docker and minio",
)


def pytest_runtest_setup(item):
Expand All @@ -35,6 +40,8 @@ def pytest_runtest_setup(item):
pytest.skip(
"set --run-network-tests to run tests requiring an internet connection"
)
if "minio" in item.keywords and not item.config.getoption("--run-minio-tests"):
pytest.skip("set --run-minio-tests to run tests requiring docker and minio")


# Common codec configurations
Expand Down
37 changes: 27 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ python = "3.11.*"
[tool.pixi.feature.py312.dependencies]
python = "3.12.*"

# Define a feature set for S3 testing with MinIO
[tool.pixi.feature.minio.pypi-dependencies]
"docker" = "*"
"minio" = "*"

# Install NetCDF and HDF5 from conda-forge for extra plugin capabilities
[tool.pixi.feature.hdf5-lib.dependencies]
netcdf4 = ">=1.7.2,<2"
Expand All @@ -162,7 +167,7 @@ h5netcdf = ">=1.5.0,<2"
rust = "*"

# Define commands to run within the test environments
[tool.pixi.feature.dev.tasks]
[tool.pixi.feature.test.tasks]
run-mypy = { cmd = "mypy virtualizarr" }
run-tests = { cmd = "pytest -n auto --run-network-tests --verbose" }
run-tests-no-network = { cmd = "pytest -n auto" }
Expand All @@ -172,24 +177,27 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov

# Define which features and groups to include in different pixi (similar to conda) environments)
[tool.pixi.environments]
min-deps = ["dev", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
min-deps = ["dev", "test", "hdf", "hdf5", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
# Inherit from min-deps to get all the test commands, along with optional dependencies
test = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
test-py311 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
test-py312 = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
upstream = ["dev", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
all = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
test = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore"]
test-py311 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py311"] # test against python 3.11
test-py312 = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312"] # test against python 3.12
minio = ["dev", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "py312", "minio"]
upstream = ["dev", "test", "hdf", "hdf5", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
all = ["dev", "test", "remote", "hdf", "hdf5", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "obstore", "all_readers", "all_writers"]
docs = ["docs"]




# Define commands to run within the docs environment
[tool.pixi.feature.docs.tasks]
build-docs = { cmd = "make html", cwd = "docs" }
serve-docs = { cmd = "sphinx-autobuild docs docs/_build --host 0.0.0.0" }
readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r docs/_build/html $READTHEDOCS_OUTPUT/html"

# Define commands to run within the docs environment
[tool.pixi.feature.minio.tasks]
run-tests = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py --run-minio-tests --verbose" }
run-tests-xml-cov = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py --run-minio-tests --verbose --cov=virtualizarr --cov-report=xml" }

[tool.setuptools_scm]
fallback_version = "9999"

Expand Down Expand Up @@ -223,6 +231,14 @@ ignore_missing_imports = true
module = "zarr.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "docker.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "minio.*"
ignore_missing_imports = true

[tool.ruff]
# Same as Black.
line-length = 88
Expand Down Expand Up @@ -287,4 +303,5 @@ markers = [
# this warning: "PytestUnknownMarkWarning: Unknown pytest.mark.flaky"
"flaky: flaky tests",
"network: marks test requiring internet (select with '--run-network-tests')",
"minio: marks test requiring docker and minio (select with '--run-minio-tests')",
]
4 changes: 3 additions & 1 deletion virtualizarr/manifests/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pickle
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any
from urllib.parse import urlparse

from zarr.abc.store import (
ByteRequest,
Expand Down Expand Up @@ -160,7 +161,8 @@ def find_matching_store(stores: StoreDict, request_key: str) -> StoreRequest:
# Check each key to see if it's a prefix of the uri_string
for key in sorted_keys:
if request_key.startswith(key):
return StoreRequest(store=stores[key], key=request_key[len(key) :])
parsed_key = urlparse(request_key)
return StoreRequest(store=stores[key], key=parsed_key.path)
Comment on lines +164 to +165
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only non-test related change in the PR, which makes the ManifestStore work for both local and object stores

# if no match is found, raise an error
raise ValueError(
f"Expected the one of stores.keys() to match the data prefix, got {stores.keys()} and {request_key}"
Expand Down
1 change: 1 addition & 0 deletions virtualizarr/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from virtualizarr.readers.hdf import HDFVirtualBackend

requires_network = pytest.mark.network
requires_minio = pytest.mark.minio


def _importorskip(
Expand Down
58 changes: 58 additions & 0 deletions virtualizarr/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import time

import pytest


@pytest.fixture(scope="session")
def container():
import docker

client = docker.from_env()
port = 9000
minio_container = client.containers.run(
"quay.io/minio/minio",
"server /data",
detach=True,
ports={f"{port}/tcp": port},
environment={
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
},
)
time.sleep(3) # give it time to boot
# enter
yield {
"port": port,
"endpoint": f"http://localhost:{port}",
"username": "minioadmin",
"password": "minioadmin",
}
# exit
minio_container.stop()
minio_container.remove()


@pytest.fixture(scope="session")
def minio_bucket(container):
# Setup with guidance from https://medium.com/@sant1/using-minio-with-docker-and-python-cbbad397cb5d
from minio import Minio

bucket = "mybucket"
filename = "test.nc"
# Initialize MinIO client
client = Minio(
"localhost:9000",
access_key=container["username"],
secret_key=container["password"],
secure=False,
)
client.make_bucket(bucket)
yield {
"port": container["port"],
"endpoint": container["endpoint"],
"username": container["username"],
"password": container["password"],
"bucket": bucket,
"file": filename,
"client": client,
}
Loading