Skip to content

Commit 95a37d9

Browse files
BrianMichellpre-commit-ci[bot]dcherian
authored
Adds chunk key encoding to kwargs passed to zarr (#10274)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]>
1 parent 568f3c1 commit 95a37d9

File tree

4 files changed

+83
-0
lines changed

4 files changed

+83
-0
lines changed

doc/internals/zarr-encoding-spec.rst

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,50 @@ re-open it directly with Zarr:
7777
import shutil
7878

7979
shutil.rmtree("rasm.zarr")
80+
81+
Chunk Key Encoding
82+
------------------
83+
84+
When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded
85+
through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This
86+
is particularly useful when working with Zarr V2 arrays and you need to control the
87+
dimension separator in chunk keys.
88+
89+
For example, to specify a custom separator for chunk keys:
90+
91+
.. jupyter-execute::
92+
93+
import xarray as xr
94+
import numpy as np
95+
from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
96+
97+
# Create a custom chunk key encoding with "/" as separator
98+
enc = V2ChunkKeyEncoding(separator="/").to_dict()
99+
100+
# Create and write a dataset with custom chunk key encoding
101+
arr = np.ones((42, 100))
102+
ds = xr.DataArray(arr, name="var1").to_dataset()
103+
ds.to_zarr(
104+
"example.zarr",
105+
zarr_format=2,
106+
mode="w",
107+
encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}},
108+
)
109+
110+
The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding
111+
configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from
112+
``zarr.core.chunk_key_encodings`` to generate this configuration. This is particularly
113+
useful when you need to ensure compatibility with specific Zarr V2 storage layouts or
114+
when working with tools that expect a particular chunk key format.
115+
116+
.. note::
117+
The ``chunk_key_encoding`` option is only relevant when writing to Zarr stores.
118+
When reading Zarr arrays, Xarray automatically detects and uses the appropriate
119+
chunk key encoding based on the store's format and configuration.
120+
121+
.. jupyter-execute::
122+
:hide-code:
123+
124+
import shutil
125+
126+
shutil.rmtree("example.zarr")

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ Bug fixes
164164
By `Mathias Hauser <https://github.com/mathause>`_.
165165
- Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`)
166166
By `Pascal Bourgault <https://github.com/aulemahal>`_.
167+
- Enable ``chunk_key_encoding`` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`)
168+
By `BrianMichell <https://github.com/BrianMichell>`_.
167169

168170
Documentation
169171
~~~~~~~~~~~~~

xarray/backends/zarr.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ def extract_zarr_variable_encoding(
395395
"serializer",
396396
"cache_metadata",
397397
"write_empty_chunks",
398+
"chunk_key_encoding",
398399
}
399400
if zarr_format == 3:
400401
valid_encodings.add("fill_value")

xarray/tests/test_backends.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3691,6 +3691,39 @@ def create_zarr_target(self):
36913691
else:
36923692
yield {}
36933693

3694+
def test_chunk_key_encoding_v2(self) -> None:
3695+
encoding = {"name": "v2", "configuration": {"separator": "/"}}
3696+
3697+
# Create a dataset with a variable name containing a period
3698+
data = np.ones((4, 4))
3699+
original = Dataset({"var1": (("x", "y"), data)})
3700+
3701+
# Set up chunk key encoding with slash separator
3702+
encoding = {
3703+
"var1": {
3704+
"chunk_key_encoding": encoding,
3705+
"chunks": (2, 2),
3706+
}
3707+
}
3708+
3709+
# Write to store with custom encoding
3710+
with self.create_zarr_target() as store:
3711+
original.to_zarr(store, encoding=encoding)
3712+
3713+
# Verify the chunk keys in store use the slash separator
3714+
if not has_zarr_v3:
3715+
chunk_keys = [k for k in store.keys() if k.startswith("var1/")]
3716+
assert len(chunk_keys) > 0
3717+
for key in chunk_keys:
3718+
assert "/" in key
3719+
assert "." not in key.split("/")[1:] # No dots in chunk coordinates
3720+
3721+
# Read back and verify data
3722+
with xr.open_zarr(store) as actual:
3723+
assert_identical(original, actual)
3724+
# Verify chunks are preserved
3725+
assert actual["var1"].encoding["chunks"] == (2, 2)
3726+
36943727

36953728
@requires_zarr
36963729
@pytest.mark.skipif(

0 commit comments

Comments
 (0)