11
11
)
12
12
13
13
14
- def dataset_from_backend_dataset (
15
- ds ,
14
+ def _get_mtime (filename_or_obj ):
15
+ # if passed an actual file path, augment the token with
16
+ # the file modification time
17
+ if isinstance (filename_or_obj , str ) and not is_remote_uri (filename_or_obj ):
18
+ mtime = os .path .getmtime (filename_or_obj )
19
+ else :
20
+ mtime = None
21
+ return mtime
22
+
23
+
24
+ def _chunk_ds (
25
+ backend_ds ,
16
26
filename_or_obj ,
17
27
engine ,
18
28
chunks ,
19
- cache ,
20
29
overwrite_encoded_chunks ,
21
30
** extra_tokens ,
22
31
):
23
- if not (isinstance (chunks , (int , dict )) or chunks is None ):
24
- if chunks != "auto" :
25
- raise ValueError (
26
- "chunks must be an int, dict, 'auto', or None. "
27
- "Instead found %s. " % chunks
28
- )
29
-
30
- _protect_dataset_variables_inplace (ds , cache )
31
- if chunks is not None and engine != "zarr" :
32
+ if engine != "zarr" :
32
33
from dask .base import tokenize
33
34
34
- # if passed an actual file path, augment the token with
35
- # the file modification time
36
- if isinstance (filename_or_obj , str ) and not is_remote_uri (filename_or_obj ):
37
- mtime = os .path .getmtime (filename_or_obj )
38
- else :
39
- mtime = None
35
+ mtime = _get_mtime (filename_or_obj )
40
36
token = tokenize (filename_or_obj , mtime , engine , chunks , ** extra_tokens )
41
37
name_prefix = "open_dataset-%s" % token
42
- ds2 = ds .chunk (chunks , name_prefix = name_prefix , token = token )
38
+ ds = backend_ds .chunk (chunks , name_prefix = name_prefix , token = token )
43
39
44
- elif engine == "zarr" :
40
+ else :
45
41
46
42
if chunks == "auto" :
47
43
try :
@@ -50,35 +46,64 @@ def dataset_from_backend_dataset(
50
46
chunks = None
51
47
52
48
if chunks is None :
53
- return ds
49
+ return backend_ds
54
50
55
51
if isinstance (chunks , int ):
56
- chunks = dict .fromkeys (ds .dims , chunks )
52
+ chunks = dict .fromkeys (backend_ds .dims , chunks )
57
53
58
54
variables = {}
59
- for k , v in ds .variables .items ():
55
+ for k , v in backend_ds .variables .items ():
60
56
var_chunks = _get_chunk (k , v , chunks )
61
57
variables [k ] = _maybe_chunk (
62
58
k ,
63
59
v ,
64
60
var_chunks ,
65
61
overwrite_encoded_chunks = overwrite_encoded_chunks ,
66
62
)
67
- ds2 = ds ._replace (variables )
63
+ ds = backend_ds ._replace (variables )
64
+ return ds
65
+
66
+
67
+ def _dataset_from_backend_dataset (
68
+ backend_ds ,
69
+ filename_or_obj ,
70
+ engine ,
71
+ chunks ,
72
+ cache ,
73
+ overwrite_encoded_chunks ,
74
+ ** extra_tokens ,
75
+ ):
76
+ if not (isinstance (chunks , (int , dict )) or chunks is None ):
77
+ if chunks != "auto" :
78
+ raise ValueError (
79
+ "chunks must be an int, dict, 'auto', or None. "
80
+ "Instead found %s. " % chunks
81
+ )
68
82
83
+ _protect_dataset_variables_inplace (backend_ds , cache )
84
+ if chunks is None :
85
+ ds = backend_ds
69
86
else :
70
- ds2 = ds
71
- ds2 ._file_obj = ds ._file_obj
87
+ ds = _chunk_ds (
88
+ backend_ds ,
89
+ filename_or_obj ,
90
+ engine ,
91
+ chunks ,
92
+ overwrite_encoded_chunks ,
93
+ ** extra_tokens ,
94
+ )
95
+
96
+ ds ._file_obj = backend_ds ._file_obj
72
97
73
98
# Ensure source filename always stored in dataset object (GH issue #2550)
74
99
if "source" not in ds .encoding :
75
100
if isinstance (filename_or_obj , str ):
76
- ds2 .encoding ["source" ] = filename_or_obj
101
+ ds .encoding ["source" ] = filename_or_obj
77
102
78
- return ds2
103
+ return ds
79
104
80
105
81
- def resolve_decoders_kwargs (decode_cf , engine , ** decoders ):
106
+ def _resolve_decoders_kwargs (decode_cf , engine , ** decoders ):
82
107
signature = plugins .ENGINES [engine ]["signature" ]
83
108
if decode_cf is False :
84
109
for d in decoders :
@@ -225,7 +250,7 @@ def open_dataset(
225
250
if engine is None :
226
251
engine = _autodetect_engine (filename_or_obj )
227
252
228
- decoders = resolve_decoders_kwargs (
253
+ decoders = _resolve_decoders_kwargs (
229
254
decode_cf ,
230
255
engine = engine ,
231
256
mask_and_scale = mask_and_scale ,
@@ -249,7 +274,7 @@ def open_dataset(
249
274
** backend_kwargs ,
250
275
** {k : v for k , v in kwargs .items () if v is not None },
251
276
)
252
- ds = dataset_from_backend_dataset (
277
+ ds = _dataset_from_backend_dataset (
253
278
backend_ds ,
254
279
filename_or_obj ,
255
280
engine ,
0 commit comments