Skip to content

Commit 6530895

Browse files
aurghsalexamici
andauthored
Move get_chunks from zarr.py to dataset.py (#4632)
* move get_chunks from zarr to dateset and removed maybe_chunks in zarr * move get_chunks from zarr to dateset and removed maybe_chunks in zarr * black * removed not used import * update warning message in get_chunks * Reformat warning text to use f-strings Co-authored-by: Alessandro Amici <[email protected]>
1 parent 9396605 commit 6530895

File tree

4 files changed

+41
-56
lines changed

4 files changed

+41
-56
lines changed

xarray/backends/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
combine_by_coords,
2727
)
2828
from ..core.dataarray import DataArray
29-
from ..core.dataset import Dataset, _maybe_chunk
29+
from ..core.dataset import Dataset, _get_chunk, _maybe_chunk
3030
from ..core.utils import close_on_error, is_grib_path, is_remote_uri
3131
from .common import AbstractDataStore, ArrayWriter
3232
from .locks import _get_scheduler
@@ -536,7 +536,7 @@ def maybe_decode_store(store, chunks):
536536
k: _maybe_chunk(
537537
k,
538538
v,
539-
store.get_chunk(k, v, chunks),
539+
_get_chunk(k, v, chunks),
540540
overwrite_encoded_chunks=overwrite_encoded_chunks,
541541
)
542542
for k, v in ds.variables.items()

xarray/backends/apiv2.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import os
22

3+
from ..core.dataset import _get_chunk, _maybe_chunk
34
from ..core.utils import is_remote_uri
4-
from . import plugins, zarr
5+
from . import plugins
56
from .api import (
67
_autodetect_engine,
78
_get_backend_cls,
@@ -54,10 +55,15 @@ def dataset_from_backend_dataset(
5455
if isinstance(chunks, int):
5556
chunks = dict.fromkeys(ds.dims, chunks)
5657

57-
variables = {
58-
k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
59-
for k, v in ds.variables.items()
60-
}
58+
variables = {}
59+
for k, v in ds.variables.items():
60+
var_chunks = _get_chunk(k, v, chunks)
61+
variables[k] = _maybe_chunk(
62+
k,
63+
v,
64+
var_chunks,
65+
overwrite_encoded_chunks=overwrite_encoded_chunks,
66+
)
6167
ds2 = ds._replace(variables)
6268

6369
else:

xarray/backends/zarr.py

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import warnings
2-
31
import numpy as np
42

53
from .. import coding, conventions
@@ -368,53 +366,6 @@ def encode_variable(self, variable):
368366
def encode_attribute(self, a):
369367
return encode_zarr_attr_value(a)
370368

371-
@staticmethod
372-
def get_chunk(name, var, chunks):
373-
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
374-
375-
# Coordinate labels aren't chunked
376-
if var.ndim == 1 and var.dims[0] == name:
377-
return chunk_spec
378-
379-
if chunks == "auto":
380-
return chunk_spec
381-
382-
for dim in var.dims:
383-
if dim in chunks:
384-
spec = chunks[dim]
385-
if isinstance(spec, int):
386-
spec = (spec,)
387-
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
388-
if any(s % chunk_spec[dim] for s in spec):
389-
warnings.warn(
390-
"Specified Dask chunks %r would "
391-
"separate Zarr chunk shape %r for "
392-
"dimension %r. This significantly "
393-
"degrades performance. Consider "
394-
"rechunking after loading instead."
395-
% (chunks[dim], chunk_spec[dim], dim),
396-
stacklevel=2,
397-
)
398-
chunk_spec[dim] = chunks[dim]
399-
return chunk_spec
400-
401-
@classmethod
402-
def maybe_chunk(cls, name, var, chunks, overwrite_encoded_chunks):
403-
chunk_spec = cls.get_chunk(name, var, chunks)
404-
405-
if (var.ndim > 0) and (chunk_spec is not None):
406-
from dask.base import tokenize
407-
408-
# does this cause any data to be read?
409-
token2 = tokenize(name, var._data, chunks)
410-
name2 = f"xarray-{name}-{token2}"
411-
var = var.chunk(chunk_spec, name=name2, lock=None)
412-
if overwrite_encoded_chunks and var.chunks is not None:
413-
var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
414-
return var
415-
else:
416-
return var
417-
418369
def store(
419370
self,
420371
variables,

xarray/core/dataset.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,34 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
359359
raise ValueError(msg % args)
360360

361361

362+
def _get_chunk(name, var, chunks):
363+
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
364+
365+
# Coordinate labels aren't chunked
366+
if var.ndim == 1 and var.dims[0] == name:
367+
return chunk_spec
368+
369+
if chunks == "auto":
370+
return chunk_spec
371+
372+
for dim in var.dims:
373+
if dim in chunks:
374+
spec = chunks[dim]
375+
if isinstance(spec, int):
376+
spec = (spec,)
377+
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
378+
if any(s % chunk_spec[dim] for s in spec):
379+
warnings.warn(
380+
f"Specified Dask chunks {chunks[dim]} would separate "
381+
f"on disks chunk shape {chunk_spec[dim]} for dimension {dim}. "
382+
"This could degrade performance. "
383+
"Consider rechunking after loading instead.",
384+
stacklevel=2,
385+
)
386+
chunk_spec[dim] = chunks[dim]
387+
return chunk_spec
388+
389+
362390
def _maybe_chunk(
363391
name,
364392
var,

0 commit comments

Comments
 (0)