Skip to content

Commit 266b264

Browse files
committed
Add zarr v3 support
1 parent a9755dc commit 266b264

File tree

7 files changed

+307
-34
lines changed

7 files changed

+307
-34
lines changed

.github/workflows/tests.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,52 @@ jobs:
5050
- name: Upload coverage to Codecov
5151
uses: codecov/[email protected]
5252
with:
53+
name: zarrv3
5354
token: ${{ secrets.CODECOV_TOKEN }}
5455
fail_ci_if_error: true
56+
57+
- name: Run tests (zarr v2)
58+
if: ${{ matrix.python == '3.13' }}
59+
run: |
60+
uv pip install "zarr>=2.17,<3"
61+
uv run --no-sync python -m pytest -xv --cov=tszip --cov-report=xml --cov-branch -n2
62+
63+
- name: Upload coverage to Codecov
64+
uses: codecov/[email protected]
65+
with:
66+
name: zarrv2
67+
token: ${{ secrets.CODECOV_TOKEN }}
68+
fail_ci_if_error: true
69+
70+
zarr-compatibility:
71+
name: Zarr v2/v3 Cross-compatibility
72+
runs-on: ubuntu-latest
73+
steps:
74+
- name: Checkout
75+
uses: actions/[email protected]
76+
77+
- name: Install uv and set python version
78+
uses: astral-sh/setup-uv@v6
79+
with:
80+
python-version: 3.13
81+
version: "0.8.15"
82+
83+
- name: Install dependencies (zarr v3)
84+
run: |
85+
uv venv
86+
uv pip install -r pyproject.toml --extra test
87+
88+
- name: Write test file with zarr v3
89+
run: uv run --no-sync python tests/zarr_cross_version_helper.py write test_v3.tsz
90+
91+
- name: Switch to zarr v2 and test reading
92+
run: |
93+
uv pip install "zarr>=2.17,<3"
94+
uv run --no-sync python tests/zarr_cross_version_helper.py read test_v3.tsz
95+
uv run --no-sync python tests/zarr_cross_version_helper.py write test_v2.tsz
96+
97+
- name: Switch back to zarr v3 and test reading both files
98+
run: |
99+
uv pip install "zarr>=3.0,<4"
100+
uv run --no-sync python tests/zarr_cross_version_helper.py read test_v3.tsz
101+
uv run --no-sync python tests/zarr_cross_version_helper.py read test_v2.tsz

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
--------------------
44

55
- Drop Python 3.9 support, require Python >= 3.10 (#112, benjeffery)
6+
- Support zarr v3 (#114, benjeffery)
67

78
--------------------
89
[0.2.4] - 2024-07-10

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies = [
3535
"humanize",
3636
"tskit>=0.3.3",
3737
"numcodecs>=0.6.4",
38-
"zarr<3",
38+
"zarr>=2.17,<4",
3939
]
4040
dynamic = ["version"]
4141

@@ -59,7 +59,8 @@ test = [
5959
"pytest-cov==6.3.0",
6060
"pytest-xdist==3.8.0",
6161
"tskit==0.6.4",
62-
"zarr==2.17.2",
62+
"zarr==2.18.3; python_version == '3.10'",
63+
"zarr==3.1.2; python_version >= '3.11'",
6364
"numcodecs>=0.6,<0.15.1", #Pinned due to https://github.com/zarr-developers/numcodecs/issues/733
6465
]
6566
docs = [
@@ -69,7 +70,7 @@ docs = [
6970
"sphinx-argparse==0.5.2",
7071
"setuptools_scm==9.2.0",
7172
"tskit==0.6.4",
72-
"zarr==2.18.7",
73+
"zarr==3.1.2",
7374
"numcodecs>=0.6,<0.15.1", #Pinned due to https://github.com/zarr-developers/numcodecs/issues/733
7475
]
7576
dev = [
@@ -87,7 +88,7 @@ dev = [
8788
"sphinx-issues",
8889
"setuptools_scm",
8990
"tskit",
90-
"zarr<3",
91+
"zarr",
9192
"msprime",
9293
"humanize",
9394
]

tests/test_compression.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@
3030
import numpy as np
3131
import pytest
3232
import tskit
33-
import zarr
3433

3534
import tszip
3635
import tszip.compression as compression
3736
import tszip.exceptions as exceptions
3837
import tszip.provenance as provenance
38+
from tszip import compat
3939

4040

4141
class TestMinimalDtype(unittest.TestCase):
@@ -294,25 +294,25 @@ def tearDown(self):
294294
def test_format_written(self):
295295
ts = msprime.simulate(10, random_seed=1)
296296
tszip.compress(ts, self.path)
297-
with zarr.ZipStore(str(self.path), mode="r") as store:
298-
root = zarr.group(store=store)
297+
with compat.create_zip_store(str(self.path), mode="r") as store:
298+
root = compat.create_zarr_group(store=store)
299299
self.assertEqual(root.attrs["format_name"], compression.FORMAT_NAME)
300300
self.assertEqual(root.attrs["format_version"], compression.FORMAT_VERSION)
301301

302302
def test_provenance(self):
303303
ts = msprime.simulate(10, random_seed=1)
304304
for variants_only in [True, False]:
305305
tszip.compress(ts, self.path, variants_only=variants_only)
306-
with zarr.ZipStore(str(self.path), mode="r") as store:
307-
root = zarr.group(store=store)
306+
with compat.create_zip_store(str(self.path), mode="r") as store:
307+
root = compat.create_zarr_group(store=store)
308308
self.assertEqual(
309309
root.attrs["provenance"],
310310
provenance.get_provenance_dict({"variants_only": variants_only}),
311311
)
312312

313313
def write_file(self, attrs, path):
314-
with zarr.ZipStore(str(path), mode="w") as store:
315-
root = zarr.group(store=store)
314+
with compat.create_zip_store(str(path), mode="w") as store:
315+
root = compat.create_zarr_group(store=store)
316316
root.attrs.update(attrs)
317317

318318
def test_missing_format_keys(self):

tests/zarr_cross_version_helper.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to test zarr cross-version compatibility.
4+
Usage: python test_zarr_cross_version.py [write|read] <filename>
5+
"""
6+
import pathlib
7+
import sys
8+
9+
import msprime
10+
import tskit
11+
12+
# Add parent directory to path so we can import tszip
13+
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
14+
15+
import tszip # noqa: E402
16+
17+
18+
def all_fields_ts(edge_metadata=True, migrations=True):
19+
"""
20+
A tree sequence with data in all fields (except edge metadata is not set if
21+
edge_metadata is False and migrations are not defined if migrations is False
22+
(this is needed to test simplify, which doesn't allow either)
23+
24+
"""
25+
demography = msprime.Demography()
26+
demography.add_population(name="A", initial_size=10_000)
27+
demography.add_population(name="B", initial_size=5_000)
28+
demography.add_population(name="C", initial_size=1_000)
29+
demography.add_population(name="D", initial_size=500)
30+
demography.add_population(name="E", initial_size=100)
31+
demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
32+
ts = msprime.sim_ancestry(
33+
samples={"A": 10, "B": 10},
34+
demography=demography,
35+
sequence_length=5,
36+
random_seed=42,
37+
recombination_rate=1,
38+
record_migrations=migrations,
39+
record_provenance=True,
40+
)
41+
ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42)
42+
tables = ts.dump_tables()
43+
# Add locations to individuals
44+
individuals_copy = tables.individuals.copy()
45+
tables.individuals.clear()
46+
for i, individual in enumerate(individuals_copy):
47+
tables.individuals.append(
48+
individual.replace(flags=i, location=[i, i + 1], parents=[i - 1, i - 1])
49+
)
50+
# Ensure all columns have unique values
51+
nodes_copy = tables.nodes.copy()
52+
tables.nodes.clear()
53+
for i, node in enumerate(nodes_copy):
54+
tables.nodes.append(
55+
node.replace(
56+
flags=i,
57+
time=node.time + 0.00001 * i,
58+
individual=i % len(tables.individuals),
59+
population=i % len(tables.populations),
60+
)
61+
)
62+
if migrations:
63+
tables.migrations.add_row(left=0, right=1, node=21, source=1, dest=3, time=1001)
64+
65+
# Add metadata
66+
for name, table in tables.table_name_map.items():
67+
if name == "provenances":
68+
continue
69+
if name == "migrations" and not migrations:
70+
continue
71+
if name == "edges" and not edge_metadata:
72+
continue
73+
table.metadata_schema = tskit.MetadataSchema.permissive_json()
74+
metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
75+
metadata, metadata_offset = tskit.pack_strings(metadatas)
76+
table.set_columns(
77+
**{
78+
**table.asdict(),
79+
"metadata": metadata,
80+
"metadata_offset": metadata_offset,
81+
}
82+
)
83+
tables.metadata_schema = tskit.MetadataSchema.permissive_json()
84+
tables.metadata = "Test metadata"
85+
tables.time_units = "Test time units"
86+
87+
tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json()
88+
tables.reference_sequence.metadata = "Test reference metadata"
89+
tables.reference_sequence.data = "A" * int(ts.sequence_length)
90+
tables.reference_sequence.url = "http://example.com/a_reference"
91+
92+
# Add some more rows to provenance to have enough for testing.
93+
for i in range(3):
94+
tables.provenances.add_row(record="A", timestamp=str(i))
95+
96+
return tables.tree_sequence()
97+
98+
99+
def write_test_file(filename):
100+
"""Write a test file with current zarr version"""
101+
ts = all_fields_ts()
102+
tszip.compress(ts, filename)
103+
ts2 = tszip.decompress(filename)
104+
ts.tables.assert_equals(ts2.tables)
105+
106+
107+
def read_test_file(filename):
108+
"""Read and verify a test file with current zarr version"""
109+
try:
110+
tszip.decompress(filename)
111+
except Exception:
112+
sys.exit(1)
113+
114+
115+
if __name__ == "__main__":
116+
action = sys.argv[1]
117+
filename = sys.argv[2]
118+
if action == "write":
119+
write_test_file(filename)
120+
elif action == "read":
121+
read_test_file(filename)

tszip/compat.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# MIT License
2+
#
3+
# Copyright (c) 2025 Tskit Developers
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
"""
23+
Compatibility layer for zarr v2/v3 API differences
24+
"""
25+
import zarr
26+
27+
ZARR_V3 = zarr.__version__.startswith("3.")
28+
29+
30+
if ZARR_V3:
31+
from zarr.storage import ZipStore
32+
33+
def create_zip_store(path, mode="r"):
34+
return ZipStore(path, mode=mode)
35+
36+
def create_zarr_group(store=None):
37+
if store is None:
38+
return zarr.create_group(zarr_format=2)
39+
else:
40+
mode = "r" if getattr(store, "read_only", False) else "a"
41+
return zarr.open_group(store=store, zarr_format=2, mode=mode)
42+
43+
def create_empty_array(
44+
group, name, shape, dtype, chunks=None, filters=None, compressor=None
45+
):
46+
return group.empty(
47+
name=name,
48+
shape=shape,
49+
dtype=dtype,
50+
chunks=chunks,
51+
zarr_format=2,
52+
filters=filters,
53+
compressor=compressor,
54+
)
55+
56+
def get_nbytes_stored(array):
57+
return array.nbytes_stored()
58+
59+
def group_items(group):
60+
return group.members()
61+
62+
def visit_arrays(group, visitor):
63+
for array in group.array_values():
64+
visitor(array)
65+
66+
else:
67+
68+
def create_zip_store(path, mode="r"):
69+
return zarr.ZipStore(path, mode=mode)
70+
71+
def create_zarr_group(store=None):
72+
return zarr.group(store=store)
73+
74+
def create_empty_array(
75+
group, name, shape, dtype, chunks=None, filters=None, compressor=None
76+
):
77+
return group.empty(
78+
name,
79+
shape=shape,
80+
dtype=dtype,
81+
chunks=chunks,
82+
filters=filters,
83+
compressor=compressor,
84+
)
85+
86+
def get_nbytes_stored(array):
87+
return array.nbytes_stored
88+
89+
def group_items(group):
90+
return group.items()
91+
92+
def visit_arrays(group, visitor):
93+
group.visitvalues(visitor)

0 commit comments

Comments
 (0)