Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
79560d5
fix merge
brokkoli71 Feb 15, 2025
7619990
add str arguments for filters, serializer, compressors
brokkoli71 Feb 15, 2025
daff61a
remove duplicate type check
brokkoli71 Feb 15, 2025
1a3a502
fix ruff
brokkoli71 Feb 15, 2025
68ac329
update docstrings
brokkoli71 Feb 15, 2025
0e227e0
document changes
brokkoli71 Feb 15, 2025
de83f92
test_bad_chunk_encoding
brokkoli71 Feb 15, 2025
73b32ac
remove unused "type: ignore" comment
brokkoli71 Feb 15, 2025
3588a65
remove comment
brokkoli71 Feb 15, 2025
74b45bb
update test_v3_chunk_encoding
brokkoli71 Feb 18, 2025
1937ee5
update test_invalid_chunk_encoding
brokkoli71 Feb 18, 2025
f3bb890
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Feb 18, 2025
ef10fe2
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Feb 28, 2025
ad96bfe
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Apr 2, 2025
28e566f
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Apr 7, 2025
d953a14
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Apr 10, 2025
363484a
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Apr 10, 2025
6399f13
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Apr 11, 2025
55f975c
test for codec with mandatory config
brokkoli71 Apr 11, 2025
a55abad
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 8, 2025
817617a
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 13, 2025
82459e3
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 15, 2025
4519547
better error msg if codec requires config
brokkoli71 May 16, 2025
5f94313
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 16, 2025
e2a18c3
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 20, 2025
61c32f8
typing
brokkoli71 May 20, 2025
41000fd
Merge remote-tracking branch 'origin/string-arguments-for-codecs' int…
brokkoli71 May 20, 2025
9e35f7c
typing
brokkoli71 May 20, 2025
a3366aa
typing in tests
brokkoli71 May 20, 2025
7a5bc66
typing in tests
brokkoli71 May 20, 2025
bac3a10
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 May 27, 2025
0abc569
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Jun 10, 2025
a4d8013
Merge branch 'main' into string-arguments-for-codecs
brokkoli71 Jun 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2839.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Array creation allows string representation of codecs for ``filters``, ``serializer``, and ``compressors``.
2 changes: 1 addition & 1 deletion src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ def create_array(
chunk to bytes.

For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
and these values must be instances of ``ArrayArrayCodec``, or dict representations
and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
of ``ArrayArrayCodec``.
If no ``filters`` are provided, a default set of filters will be used.
These defaults can be changed by modifying the value of ``array.v3_default_filters``
Expand Down
27 changes: 10 additions & 17 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3794,11 +3794,11 @@ def _build_parents(


FiltersLike: TypeAlias = (
Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec]
Iterable[dict[str, JSON] | str | ArrayArrayCodec | numcodecs.abc.Codec]
| ArrayArrayCodec
| Iterable[numcodecs.abc.Codec]
| numcodecs.abc.Codec
| Literal["auto"]
| str
| None
)
# Union of acceptable types for users to pass in for both v2 and v3 compressors
Expand All @@ -3807,14 +3807,14 @@ def _build_parents(
)

CompressorsLike: TypeAlias = (
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
Iterable[dict[str, JSON] | str | BytesBytesCodec | numcodecs.abc.Codec]
| dict[str, JSON]
| BytesBytesCodec
| numcodecs.abc.Codec
| Literal["auto"]
| str
| None
)
SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | Literal["auto"]
SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | str


class ShardsConfigParam(TypedDict):
Expand Down Expand Up @@ -4356,7 +4356,7 @@ async def create_array(
chunk to bytes.

For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
and these values must be instances of ``ArrayArrayCodec``, or dict representations
and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
of ``ArrayArrayCodec``.
If no ``filters`` are provided, a default set of filters will be used.
These defaults can be changed by modifying the value of ``array.v3_default_filters``
Expand Down Expand Up @@ -4655,24 +4655,13 @@ def _parse_chunk_encoding_v2(
elif isinstance(compressor, tuple | list) and len(compressor) == 1:
_compressor = parse_compressor(compressor[0])
else:
if isinstance(compressor, Iterable) and not isinstance(compressor, dict):
msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Got an iterable with type {type(compressor)} instead."
raise TypeError(msg)
_compressor = parse_compressor(compressor)

if filters is None:
_filters = None
elif filters == "auto":
_filters = default_filters
else:
if isinstance(filters, Iterable):
for idx, f in enumerate(filters):
if not isinstance(f, numcodecs.abc.Codec):
msg = (
"For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. "
f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec."
)
raise TypeError(msg)
_filters = parse_filters(filters)

return _filters, _compressor
Expand All @@ -4696,6 +4685,8 @@ def _parse_chunk_encoding_v3(
out_array_array: tuple[ArrayArrayCodec, ...] = ()
elif filters == "auto":
out_array_array = default_array_array
elif isinstance(filters, str):
out_array_array = (_parse_array_array_codec(filters),)
else:
maybe_array_array: Iterable[Codec | dict[str, JSON]]
if isinstance(filters, dict | Codec):
Expand All @@ -4716,6 +4707,8 @@ def _parse_chunk_encoding_v3(
out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
elif compressors == "auto":
out_bytes_bytes = default_bytes_bytes
elif isinstance(compressors, str):
out_bytes_bytes = (_parse_bytes_bytes_codec(compressors),)
else:
maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
if isinstance(compressors, dict | Codec):
Expand Down
6 changes: 3 additions & 3 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ async def create_array(
chunk to bytes.

For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
and these values must be instances of ``ArrayArrayCodec``, or dict representations
and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
of ``ArrayArrayCodec``.
If no ``filters`` are provided, a default set of filters will be used.
These defaults can be changed by modifying the value of ``array.v3_default_filters``
Expand Down Expand Up @@ -2451,7 +2451,7 @@ def create_array(
chunk to bytes.

For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
and these values must be instances of ``ArrayArrayCodec``, or dict representations
and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
of ``ArrayArrayCodec``.
If no ``filters`` are provided, a default set of filters will be used.
These defaults can be changed by modifying the value of ``array.v3_default_filters``
Expand Down Expand Up @@ -2849,7 +2849,7 @@ def array(
chunk to bytes.

For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
and these values must be instances of ``ArrayArrayCodec``, or dict representations
and these values must be instances of ``ArrayArrayCodec``, or dict or string representations
of ``ArrayArrayCodec``.
If no ``filters`` are provided, a default set of filters will be used.
These defaults can be changed by modifying the value of ``array.v3_default_filters``
Expand Down
32 changes: 29 additions & 3 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,14 +270,29 @@

if data is None:
return data
if isinstance(data, str):
try:
return (numcodecs.get_codec({"id": data}),)
except TypeError as e:
codec_cls = numcodecs.registry.codec_registry.get(data)
msg = (
f'A string representation for filter "{data}" was provided which specifies codec {codec_cls.__name__}. But that codec '
f"cannot be specified by a string because it takes a required configuration. Use either the dict "
f"representation of {data} codec, or pass in a concrete {codec_cls.__name__} instance instead"
)
raise TypeError(msg) from e
if isinstance(data, Iterable):
for idx, val in enumerate(data):
if isinstance(val, numcodecs.abc.Codec):
out.append(val)
elif isinstance(val, dict):
out.append(numcodecs.get_codec(val))
elif isinstance(val, str):
filter = parse_filters(val)
if filter is not None:
out.extend(filter)
else:
msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead."
msg = f"For Zarr format 2 arrays, all elements of `filters` must be a numcodecs.abc.Codec or a dict or str representation of numcodecs.abc.Codec. Got {type(val)} at index {idx} instead."
raise TypeError(msg)
if len(out) == 0:
# Per the v2 spec, an empty tuple is not allowed -- use None to express "no filters"
Expand All @@ -287,7 +302,7 @@
# take a single codec instance and wrap it in a tuple
if isinstance(data, numcodecs.abc.Codec):
return (data,)
msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
msg = f"For Zarr format 2 arrays, all elements of `filters` must be None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
raise TypeError(msg)


Expand All @@ -299,7 +314,18 @@
return data
if isinstance(data, dict):
return numcodecs.get_codec(data)
msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
if isinstance(data, str):
try:
return numcodecs.get_codec({"id": data})
except TypeError as e:
codec_cls = numcodecs.registry.codec_registry.get(data)
msg = (

Check warning on line 322 in src/zarr/core/metadata/v2.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v2.py#L317-L322

Added lines #L317 - L322 were not covered by tests
f'A string representation for compressor "{data}" was provided which specifies codec {codec_cls.__name__}. But that codec '
f"cannot be specified by a string because it takes a required configuration. Use either the dict "
f"representation of {data} codec, or pass in a concrete {codec_cls.__name__} instance instead"
)
raise TypeError(msg) from e
msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Expected None, a numcodecs.abc.Codec, or a dict or str representation of a numcodecs.abc.Codec. Got {type(data)} instead."

Check warning on line 328 in src/zarr/core/metadata/v2.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v2.py#L327-L328

Added lines #L327 - L328 were not covered by tests
raise ValueError(msg)


Expand Down
69 changes: 48 additions & 21 deletions src/zarr/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,63 +177,90 @@
return get_codec_class(data["name"]).from_dict(data) # type: ignore[arg-type]


def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec:
def _parse_bytes_bytes_codec(data: dict[str, JSON] | str | Codec) -> BytesBytesCodec:
"""
Normalize the input to a ``BytesBytesCodec`` instance.
If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it
is converted to a ``BytesBytesCodec`` instance via the ``_resolve_codec`` function.
"""
from zarr.abc.codec import BytesBytesCodec

if isinstance(data, dict):
if isinstance(data, str):
try:
result = _resolve_codec({"name": data, "configuration": {}})
except TypeError as e:
codec_cls = get_codec_class(data)
msg = (

Check warning on line 193 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L189-L193

Added lines #L189 - L193 were not covered by tests
f'A string representation for compressor "{data}" was provided which specifies codec {codec_cls.__name__}. '
f"But that codec cannot be specified by a string because it takes a required configuration. Use either "
f"the dict representation of {data} codec, or pass in a concrete {codec_cls.__name__} instance instead"
)
raise TypeError(msg) from e

Check warning on line 198 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L198

Added line #L198 was not covered by tests
elif isinstance(data, dict):
result = _resolve_codec(data)
if not isinstance(result, BytesBytesCodec):
msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, BytesBytesCodec):
raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, BytesBytesCodec):
msg = f"Expected a representation of a BytesBytesCodec; got a representation of a {type(result)} instead."
raise TypeError(msg)

Check warning on line 205 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L204-L205

Added lines #L204 - L205 were not covered by tests
return result


def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec:
def _parse_array_bytes_codec(data: dict[str, JSON] | str | Codec) -> ArrayBytesCodec:
"""
Normalize the input to a ``ArrayBytesCodec`` instance.
If the input is already a ``ArrayBytesCodec``, it is returned as is. If the input is a dict, it
is converted to a ``ArrayBytesCodec`` instance via the ``_resolve_codec`` function.
"""
from zarr.abc.codec import ArrayBytesCodec

if isinstance(data, dict):
if isinstance(data, str):
try:
result = _resolve_codec({"name": data, "configuration": {}})
except TypeError as e:
codec_cls = get_codec_class(data)
msg = (

Check warning on line 222 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L218-L222

Added lines #L218 - L222 were not covered by tests
f'A string representation for serializer "{data}" was provided which specifies codec {codec_cls.__name__}. '
f"But that codec cannot be specified by a string because it takes a required configuration. Use either "
f"the dict representation of {data} codec, or pass in a concrete {codec_cls.__name__} instance instead"
)
raise TypeError(msg) from e

Check warning on line 227 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L227

Added line #L227 was not covered by tests
elif isinstance(data, dict):
result = _resolve_codec(data)
if not isinstance(result, ArrayBytesCodec):
msg = f"Expected a dict representation of a ArrayBytesCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, ArrayBytesCodec):
raise TypeError(f"Expected a ArrayBytesCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, ArrayBytesCodec):
msg = f"Expected a representation of a ArrayBytesCodec; got a representation of a {type(result)} instead."
raise TypeError(msg)

Check warning on line 234 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L233-L234

Added lines #L233 - L234 were not covered by tests
return result


def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec:
def _parse_array_array_codec(data: dict[str, JSON] | str | Codec) -> ArrayArrayCodec:
"""
Normalize the input to a ``ArrayArrayCodec`` instance.
If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it
is converted to a ``ArrayArrayCodec`` instance via the ``_resolve_codec`` function.
"""
from zarr.abc.codec import ArrayArrayCodec

if isinstance(data, dict):
if isinstance(data, str):
try:
result = _resolve_codec({"name": data, "configuration": {}})
except TypeError as e:
codec_cls = get_codec_class(data)
msg = (

Check warning on line 251 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L247-L251

Added lines #L247 - L251 were not covered by tests
f'A string representation for filter "{data}" was provided which specifies codec {codec_cls.__name__}. '
f"But that codec cannot be specified by a string because it takes a required configuration. Use either "
f"the dict representation of {data} codec, or pass in a concrete {codec_cls.__name__} instance instead"
)
raise TypeError(msg) from e

Check warning on line 256 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L256

Added line #L256 was not covered by tests
elif isinstance(data, dict):
result = _resolve_codec(data)
if not isinstance(result, ArrayArrayCodec):
msg = f"Expected a dict representation of a ArrayArrayCodec; got a dict representation of a {type(result)} instead."
raise TypeError(msg)
else:
if not isinstance(data, ArrayArrayCodec):
raise TypeError(f"Expected a ArrayArrayCodec. Got {type(data)} instead.")
result = data
if not isinstance(result, ArrayArrayCodec):
msg = f"Expected a representation of a ArrayArrayCodec; got a representation of a {type(result)} instead."
raise TypeError(msg)

Check warning on line 263 in src/zarr/registry.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/registry.py#L262-L263

Added lines #L262 - L263 were not covered by tests
return result


Expand Down
Loading