Skip to content

Numcodecs in v3 #3037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions src/zarr/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from importlib.metadata import entry_points as get_entry_points
from typing import TYPE_CHECKING, Any, Generic, TypeVar

import numcodecs

from zarr.abc.codec import Codec
from zarr.core.config import BadConfigError, config

if TYPE_CHECKING:
Expand All @@ -14,7 +17,6 @@
ArrayArrayCodec,
ArrayBytesCodec,
BytesBytesCodec,
Codec,
CodecPipeline,
)
from zarr.core.buffer import Buffer, NDBuffer
Expand Down Expand Up @@ -166,6 +168,25 @@
return get_codec_class(data["name"]).from_dict(data) # type: ignore[arg-type]


def numcodec_to_zarr3_codec(codec: numcodecs.abc.Codec) -> Codec:
import numcodecs.zarr3

Check warning on line 172 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L172

Added line #L172 was not covered by tests

codec_name = codec.__class__.__name__
numcodecs_zarr3_module = numcodecs.zarr3

Check warning on line 175 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L174-L175

Added lines #L174 - L175 were not covered by tests

if not hasattr(numcodecs_zarr3_module, codec_name):
raise ValueError(f"No Zarr3 wrapper found for numcodec: {codec_name}")

Check warning on line 178 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L177-L178

Added lines #L177 - L178 were not covered by tests

numcodecs_zarr3_codec_class = getattr(numcodecs_zarr3_module, codec_name)

Check warning on line 180 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L180

Added line #L180 was not covered by tests

codec_config = codec.get_config()
codec_config.pop("id", None)

Check warning on line 183 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L182-L183

Added lines #L182 - L183 were not covered by tests

codec = numcodecs_zarr3_codec_class(**codec_config)
assert isinstance(codec, Codec)
return codec

Check warning on line 187 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L185-L187

Added lines #L185 - L187 were not covered by tests


def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec:
"""
Normalize the input to a ``BytesBytesCodec`` instance.
Expand All @@ -174,15 +195,17 @@
"""
from zarr.abc.codec import BytesBytesCodec

if isinstance(data, dict):
if isinstance(data, numcodecs.abc.Codec):
result = numcodec_to_zarr3_codec(data)

Check warning on line 199 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L199

Added line #L199 was not covered by tests
elif isinstance(data, dict):
result = _resolve_codec(data)
if not isinstance(result, BytesBytesCodec):
msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, BytesBytesCodec):
raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, BytesBytesCodec):
raise TypeError(f"Expected a BytesBytesCodec. Got {type(result)} instead.")

Check warning on line 208 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L208

Added line #L208 was not covered by tests
return result


Expand All @@ -194,15 +217,17 @@
"""
from zarr.abc.codec import ArrayBytesCodec

if isinstance(data, dict):
if isinstance(data, numcodecs.abc.Codec):
result = numcodec_to_zarr3_codec(data)

Check warning on line 221 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L221

Added line #L221 was not covered by tests
elif isinstance(data, dict):
result = _resolve_codec(data)
if not isinstance(result, ArrayBytesCodec):
msg = f"Expected a dict representation of a ArrayBytesCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, ArrayBytesCodec):
raise TypeError(f"Expected a ArrayBytesCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, ArrayBytesCodec):
raise TypeError(f"Expected a ArrayBytesCodec. Got {type(result)} instead.")

Check warning on line 230 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L230

Added line #L230 was not covered by tests
return result


Expand All @@ -214,15 +239,17 @@
"""
from zarr.abc.codec import ArrayArrayCodec

if isinstance(data, dict):
if isinstance(data, numcodecs.abc.Codec):
result = numcodec_to_zarr3_codec(data)
elif isinstance(data, dict):

Check warning on line 244 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L242-L244

Added lines #L242 - L244 were not covered by tests
result = _resolve_codec(data)
if not isinstance(result, ArrayArrayCodec):
msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, ArrayArrayCodec):
raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, ArrayArrayCodec):
raise TypeError(f"Expected a ArrayArrayCodec. Got {type(result)} instead.")

Check warning on line 252 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L251-L252

Added lines #L251 - L252 were not covered by tests
return result


Expand Down
67 changes: 67 additions & 0 deletions tests/test_codecs/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING

import numcodecs
import numcodecs.zarr3
import numpy as np
import pytest

import zarr
import zarr.api
import zarr.api.asynchronous
from zarr import Array, AsyncArray, config
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
from zarr.codecs import (
BytesCodec,
GzipCodec,
Expand All @@ -23,6 +26,7 @@

if TYPE_CHECKING:
from zarr.abc.store import Store
from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike
from zarr.core.buffer import NDArrayLike
from zarr.core.common import MemoryOrder

Expand Down Expand Up @@ -406,3 +410,66 @@ async def test_resize(store: Store) -> None:
assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None
assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None
assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None


@pytest.mark.parametrize("store", ["memory"], indirect=["store"])
@pytest.mark.parametrize(
("codec_v2", "expected_v3_cls"),
[
(numcodecs.BZ2(), numcodecs.zarr3.BZ2),
(numcodecs.CRC32(), numcodecs.zarr3.CRC32),
(numcodecs.CRC32C(), numcodecs.zarr3.CRC32C),
(numcodecs.LZ4(), numcodecs.zarr3.LZ4),
(numcodecs.LZMA(), numcodecs.zarr3.LZMA),
# (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), AttributeError: module 'numcodecs' has no attribute 'ZFPY'
(numcodecs.Adler32(), numcodecs.zarr3.Adler32),
(
numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32),
numcodecs.zarr3.AsType,
),
(numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound),
(numcodecs.Blosc(), numcodecs.zarr3.Blosc),
(numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta),
(
numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype="f8", astype="u1"),
numcodecs.zarr3.FixedScaleOffset,
),
(numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32),
(numcodecs.GZip(), numcodecs.zarr3.GZip),
(numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3),
# (numcodecs.PCodec(), numcodecs.zarr3.PCodec), AttributeError: module 'numcodecs' has no attribute 'PCodec'
(numcodecs.PackBits(), numcodecs.zarr3.PackBits),
(numcodecs.Quantize(digits=1, dtype="f8"), numcodecs.zarr3.Quantize),
(numcodecs.Shuffle(), numcodecs.zarr3.Shuffle),
(numcodecs.Zlib(), numcodecs.zarr3.Zlib),
(numcodecs.Zstd(), numcodecs.zarr3.Zstd),
],
)
def test_numcodecs_in_v3(store: Store, codec_v2, expected_v3_cls) -> None:
result_v3 = zarr.registry.numcodec_to_zarr3_codec(codec_v2)

assert result_v3.__class__ == expected_v3_cls
assert result_v3.codec_config == codec_v2.get_config()

filters: FiltersLike = "auto"
serializer: SerializerLike = "auto"
compressors: CompressorsLike = "auto"
if isinstance(result_v3, ArrayArrayCodec):
filters = [codec_v2]
elif isinstance(result_v3, ArrayBytesCodec):
serializer = codec_v2
elif isinstance(result_v3, BytesBytesCodec):
compressors = [codec_v2]
else:
raise TypeError(f"unsupported type: {result_v3.__class__}")

zarr.create_array(
store,
shape=(64,),
chunks=(64,),
dtype=np.bool,
fill_value=False,
filters=filters,
compressors=compressors,
serializer=serializer,
)
Loading