Skip to content

Commit 8ac3d86

Browse files
authored
Refactor apiv2.open_dataset (#4642)
* in apiv2: rename ds in backend_ds and ds2 in ds * add function _chunks_ds to simplify dataset_from_backend_dataset * add small function _get_mtime to simplify _chunks_ds * make resolve_decoders_kwargs and dataset_from_backend_dataset private
1 parent 6530895 commit 8ac3d86

File tree

1 file changed

+56
-31
lines changed

1 file changed

+56
-31
lines changed

xarray/backends/apiv2.py

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,33 @@
1111
)
1212

1313

14-
def dataset_from_backend_dataset(
15-
ds,
14+
def _get_mtime(filename_or_obj):
15+
# if passed an actual file path, augment the token with
16+
# the file modification time
17+
if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
18+
mtime = os.path.getmtime(filename_or_obj)
19+
else:
20+
mtime = None
21+
return mtime
22+
23+
24+
def _chunk_ds(
25+
backend_ds,
1626
filename_or_obj,
1727
engine,
1828
chunks,
19-
cache,
2029
overwrite_encoded_chunks,
2130
**extra_tokens,
2231
):
23-
if not (isinstance(chunks, (int, dict)) or chunks is None):
24-
if chunks != "auto":
25-
raise ValueError(
26-
"chunks must be an int, dict, 'auto', or None. "
27-
"Instead found %s. " % chunks
28-
)
29-
30-
_protect_dataset_variables_inplace(ds, cache)
31-
if chunks is not None and engine != "zarr":
32+
if engine != "zarr":
3233
from dask.base import tokenize
3334

34-
# if passed an actual file path, augment the token with
35-
# the file modification time
36-
if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
37-
mtime = os.path.getmtime(filename_or_obj)
38-
else:
39-
mtime = None
35+
mtime = _get_mtime(filename_or_obj)
4036
token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
4137
name_prefix = "open_dataset-%s" % token
42-
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
38+
ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
4339

44-
elif engine == "zarr":
40+
else:
4541

4642
if chunks == "auto":
4743
try:
@@ -50,35 +46,64 @@ def dataset_from_backend_dataset(
5046
chunks = None
5147

5248
if chunks is None:
53-
return ds
49+
return backend_ds
5450

5551
if isinstance(chunks, int):
56-
chunks = dict.fromkeys(ds.dims, chunks)
52+
chunks = dict.fromkeys(backend_ds.dims, chunks)
5753

5854
variables = {}
59-
for k, v in ds.variables.items():
55+
for k, v in backend_ds.variables.items():
6056
var_chunks = _get_chunk(k, v, chunks)
6157
variables[k] = _maybe_chunk(
6258
k,
6359
v,
6460
var_chunks,
6561
overwrite_encoded_chunks=overwrite_encoded_chunks,
6662
)
67-
ds2 = ds._replace(variables)
63+
ds = backend_ds._replace(variables)
64+
return ds
65+
66+
67+
def _dataset_from_backend_dataset(
68+
backend_ds,
69+
filename_or_obj,
70+
engine,
71+
chunks,
72+
cache,
73+
overwrite_encoded_chunks,
74+
**extra_tokens,
75+
):
76+
if not (isinstance(chunks, (int, dict)) or chunks is None):
77+
if chunks != "auto":
78+
raise ValueError(
79+
"chunks must be an int, dict, 'auto', or None. "
80+
"Instead found %s. " % chunks
81+
)
6882

83+
_protect_dataset_variables_inplace(backend_ds, cache)
84+
if chunks is None:
85+
ds = backend_ds
6986
else:
70-
ds2 = ds
71-
ds2._file_obj = ds._file_obj
87+
ds = _chunk_ds(
88+
backend_ds,
89+
filename_or_obj,
90+
engine,
91+
chunks,
92+
overwrite_encoded_chunks,
93+
**extra_tokens,
94+
)
95+
96+
ds._file_obj = backend_ds._file_obj
7297

7398
# Ensure source filename always stored in dataset object (GH issue #2550)
7499
if "source" not in ds.encoding:
75100
if isinstance(filename_or_obj, str):
76-
ds2.encoding["source"] = filename_or_obj
101+
ds.encoding["source"] = filename_or_obj
77102

78-
return ds2
103+
return ds
79104

80105

81-
def resolve_decoders_kwargs(decode_cf, engine, **decoders):
106+
def _resolve_decoders_kwargs(decode_cf, engine, **decoders):
82107
signature = plugins.ENGINES[engine]["signature"]
83108
if decode_cf is False:
84109
for d in decoders:
@@ -225,7 +250,7 @@ def open_dataset(
225250
if engine is None:
226251
engine = _autodetect_engine(filename_or_obj)
227252

228-
decoders = resolve_decoders_kwargs(
253+
decoders = _resolve_decoders_kwargs(
229254
decode_cf,
230255
engine=engine,
231256
mask_and_scale=mask_and_scale,
@@ -249,7 +274,7 @@ def open_dataset(
249274
**backend_kwargs,
250275
**{k: v for k, v in kwargs.items() if v is not None},
251276
)
252-
ds = dataset_from_backend_dataset(
277+
ds = _dataset_from_backend_dataset(
253278
backend_ds,
254279
filename_or_obj,
255280
engine,

0 commit comments

Comments
 (0)