93
93
QuantileMethods ,
94
94
Self ,
95
95
T_ChunkDim ,
96
- T_Chunks ,
96
+ T_ChunksFreq ,
97
97
T_DataArray ,
98
98
T_DataArrayOrSet ,
99
99
T_Dataset ,
162
162
QueryParserOptions ,
163
163
ReindexMethodOptions ,
164
164
SideOptions ,
165
+ T_ChunkDimFreq ,
165
166
T_Xarray ,
166
167
)
167
168
from xarray .core .weighted import DatasetWeighted
@@ -283,18 +284,17 @@ def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
283
284
284
285
285
286
def _maybe_chunk (
286
- name ,
287
- var ,
288
- chunks ,
287
+ name : Hashable ,
288
+ var : Variable ,
289
+ chunks : Mapping [ Any , T_ChunkDim ] | None ,
289
290
token = None ,
290
291
lock = None ,
291
- name_prefix = "xarray-" ,
292
- overwrite_encoded_chunks = False ,
293
- inline_array = False ,
292
+ name_prefix : str = "xarray-" ,
293
+ overwrite_encoded_chunks : bool = False ,
294
+ inline_array : bool = False ,
294
295
chunked_array_type : str | ChunkManagerEntrypoint | None = None ,
295
296
from_array_kwargs = None ,
296
- ):
297
-
297
+ ) -> Variable :
298
298
from xarray .namedarray .daskmanager import DaskManager
299
299
300
300
if chunks is not None :
@@ -2648,14 +2648,14 @@ def chunksizes(self) -> Mapping[Hashable, tuple[int, ...]]:
2648
2648
2649
2649
def chunk (
2650
2650
self ,
2651
- chunks : T_Chunks = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
2651
+ chunks : T_ChunksFreq = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
2652
2652
name_prefix : str = "xarray-" ,
2653
2653
token : str | None = None ,
2654
2654
lock : bool = False ,
2655
2655
inline_array : bool = False ,
2656
2656
chunked_array_type : str | ChunkManagerEntrypoint | None = None ,
2657
2657
from_array_kwargs = None ,
2658
- ** chunks_kwargs : T_ChunkDim ,
2658
+ ** chunks_kwargs : T_ChunkDimFreq ,
2659
2659
) -> Self :
2660
2660
"""Coerce all arrays in this dataset into dask arrays with the given
2661
2661
chunks.
@@ -2667,11 +2667,13 @@ def chunk(
2667
2667
sizes along that dimension will not be updated; non-dask arrays will be
2668
2668
converted into dask arrays with a single block.
2669
2669
2670
+ Along datetime-like dimensions, a :py:class:`groupers.TimeResampler` object is also accepted.
2671
+
2670
2672
Parameters
2671
2673
----------
2672
- chunks : int, tuple of int, "auto" or mapping of hashable to int, optional
2674
+ chunks : int, tuple of int, "auto" or mapping of hashable to int or a TimeResampler , optional
2673
2675
Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, or
2674
- ``{"x": 5, "y": 5}``.
2676
+ ``{"x": 5, "y": 5}`` or ``{"x": 5, "time": TimeResampler(freq="YE")}`` .
2675
2677
name_prefix : str, default: "xarray-"
2676
2678
Prefix for the name of any new dask arrays.
2677
2679
token : str, optional
@@ -2706,6 +2708,9 @@ def chunk(
2706
2708
xarray.unify_chunks
2707
2709
dask.array.from_array
2708
2710
"""
2711
+ from xarray .core .dataarray import DataArray
2712
+ from xarray .core .groupers import TimeResampler
2713
+
2709
2714
if chunks is None and not chunks_kwargs :
2710
2715
warnings .warn (
2711
2716
"None value for 'chunks' is deprecated. "
@@ -2731,6 +2736,42 @@ def chunk(
2731
2736
f"chunks keys { tuple (bad_dims )} not found in data dimensions { tuple (self .sizes .keys ())} "
2732
2737
)
2733
2738
2739
+ def _resolve_frequency (
2740
+ name : Hashable , resampler : TimeResampler
2741
+ ) -> tuple [int , ...]:
2742
+ variable = self ._variables .get (name , None )
2743
+ if variable is None :
2744
+ raise ValueError (
2745
+ f"Cannot chunk by resampler { resampler !r} for virtual variables."
2746
+ )
2747
+ elif not _contains_datetime_like_objects (variable ):
2748
+ raise ValueError (
2749
+ f"chunks={ resampler !r} only supported for datetime variables. "
2750
+ f"Received variable { name !r} with dtype { variable .dtype !r} instead."
2751
+ )
2752
+
2753
+ assert variable .ndim == 1
2754
+ chunks : tuple [int , ...] = tuple (
2755
+ DataArray (
2756
+ np .ones (variable .shape , dtype = int ),
2757
+ dims = (name ,),
2758
+ coords = {name : variable },
2759
+ )
2760
+ .resample ({name : resampler })
2761
+ .sum ()
2762
+ .data .tolist ()
2763
+ )
2764
+ return chunks
2765
+
2766
+ chunks_mapping_ints : Mapping [Any , T_ChunkDim ] = {
2767
+ name : (
2768
+ _resolve_frequency (name , chunks )
2769
+ if isinstance (chunks , TimeResampler )
2770
+ else chunks
2771
+ )
2772
+ for name , chunks in chunks_mapping .items ()
2773
+ }
2774
+
2734
2775
chunkmanager = guess_chunkmanager (chunked_array_type )
2735
2776
if from_array_kwargs is None :
2736
2777
from_array_kwargs = {}
@@ -2739,7 +2780,7 @@ def chunk(
2739
2780
k : _maybe_chunk (
2740
2781
k ,
2741
2782
v ,
2742
- chunks_mapping ,
2783
+ chunks_mapping_ints ,
2743
2784
token ,
2744
2785
lock ,
2745
2786
name_prefix ,
0 commit comments