|
8 | 8 | from ..core import indexing
|
9 | 9 | from ..core.pycompat import integer_types
|
10 | 10 | from ..core.utils import FrozenOrderedDict, HiddenKeyDict
|
11 |
| -from .common import AbstractWritableDataStore, BackendArray |
| 11 | +from .common import AbstractWritableDataStore, BackendArray, \ |
| 12 | + _encode_variable_name |
12 | 13 |
|
13 | 14 | # need some special secret attributes to tell us the dimensions
|
14 | 15 | _DIMENSION_KEY = '_ARRAY_DIMENSIONS'
|
@@ -212,7 +213,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
|
212 | 213 | # zarr allows unicode, but not variable-length strings, so it's both
|
213 | 214 | # simpler and more compact to always encode as UTF-8 explicitly.
|
214 | 215 | # TODO: allow toggling this explicitly via dtype in encoding.
|
215 |
| - coder = coding.strings.EncodedStringCoder(allows_unicode=False) |
| 216 | + coder = coding.strings.EncodedStringCoder(allows_unicode=True) |
216 | 217 | var = coder.encode(var, name=name)
|
217 | 218 | var = coding.strings.ensure_fixed_length_bytes(var)
|
218 | 219 |
|
@@ -257,6 +258,7 @@ def __init__(self, zarr_group, consolidate_on_close=False):
|
257 | 258 | self._synchronizer = self.ds.synchronizer
|
258 | 259 | self._group = self.ds.path
|
259 | 260 | self._consolidate_on_close = consolidate_on_close
|
| 261 | + self.append_dim = None |
260 | 262 |
|
261 | 263 | def open_store_variable(self, name, zarr_array):
|
262 | 264 | data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self))
|
@@ -313,40 +315,122 @@ def encode_variable(self, variable):
|
313 | 315 | def encode_attribute(self, a):
|
314 | 316 | return _encode_zarr_attr_value(a)
|
315 | 317 |
|
316 |
| - def prepare_variable(self, name, variable, check_encoding=False, |
317 |
| - unlimited_dims=None): |
318 |
| - |
319 |
| - attrs = variable.attrs.copy() |
320 |
| - dims = variable.dims |
321 |
| - dtype = variable.dtype |
322 |
| - shape = variable.shape |
323 |
| - |
324 |
| - fill_value = attrs.pop('_FillValue', None) |
325 |
| - if variable.encoding == {'_FillValue': None} and fill_value is None: |
326 |
| - variable.encoding = {} |
327 |
| - |
328 |
| - encoding = _extract_zarr_variable_encoding( |
329 |
| - variable, raise_on_invalid=check_encoding) |
330 |
| - |
331 |
| - encoded_attrs = OrderedDict() |
332 |
| - # the magic for storing the hidden dimension data |
333 |
| - encoded_attrs[_DIMENSION_KEY] = dims |
334 |
| - for k, v in attrs.items(): |
335 |
| - encoded_attrs[k] = self.encode_attribute(v) |
336 |
| - |
337 |
| - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, |
338 |
| - fill_value=fill_value, **encoding) |
339 |
| - zarr_array.attrs.put(encoded_attrs) |
340 |
| - |
341 |
| - return zarr_array, variable.data |
342 |
| - |
343 |
| - def store(self, variables, attributes, *args, **kwargs): |
344 |
| - AbstractWritableDataStore.store(self, variables, attributes, |
345 |
| - *args, **kwargs) |
| 318 | + def store(self, variables, attributes, check_encoding_set=frozenset(), |
| 319 | + writer=None, unlimited_dims=None): |
| 320 | + """ |
| 321 | + Top level method for putting data on this store, this method: |
| 322 | + - encodes variables/attributes |
| 323 | + - sets dimensions |
| 324 | + - sets variables |
| 325 | +
|
| 326 | + Parameters |
| 327 | + ---------- |
| 328 | + variables : dict-like |
| 329 | + Dictionary of key/value (variable name / xr.Variable) pairs |
| 330 | + attributes : dict-like |
| 331 | + Dictionary of key/value (attribute name / attribute) pairs |
| 332 | + check_encoding_set : list-like |
| 333 | + List of variables that should be checked for invalid encoding |
| 334 | + values |
| 335 | + writer : ArrayWriter |
| 336 | + unlimited_dims : list-like |
| 337 | + List of dimension names that should be treated as unlimited |
| 338 | + dimensions. |
| 339 | + dimension on which the zarray will be appended |
| 340 | + only needed in append mode |
| 341 | + """ |
| 342 | + |
| 343 | + existing_variables = set([vn for vn in variables |
| 344 | + if _encode_variable_name(vn) in self.ds]) |
| 345 | + new_variables = set(variables) - existing_variables |
| 346 | + variables_without_encoding = OrderedDict([(vn, variables[vn]) |
| 347 | + for vn in new_variables]) |
| 348 | + variables_encoded, attributes = self.encode( |
| 349 | + variables_without_encoding, attributes) |
| 350 | + |
| 351 | + if len(existing_variables) > 0: |
| 352 | + # there are variables to append |
| 353 | + # their encoding must be the same as in the store |
| 354 | + ds = open_zarr(self.ds.store, chunks=None) |
| 355 | + variables_with_encoding = OrderedDict() |
| 356 | + for vn in existing_variables: |
| 357 | + variables_with_encoding[vn] = variables[vn].copy(deep=False) |
| 358 | + variables_with_encoding[vn].encoding = ds[vn].encoding |
| 359 | + variables_with_encoding, _ = self.encode(variables_with_encoding, |
| 360 | + {}) |
| 361 | + variables_encoded.update(variables_with_encoding) |
| 362 | + |
| 363 | + self.set_attributes(attributes) |
| 364 | + self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims) |
| 365 | + self.set_variables(variables_encoded, check_encoding_set, writer, |
| 366 | + unlimited_dims=unlimited_dims) |
346 | 367 |
|
347 | 368 | def sync(self):
|
348 | 369 | pass
|
349 | 370 |
|
| 371 | + def set_variables(self, variables, check_encoding_set, writer, |
| 372 | + unlimited_dims=None): |
| 373 | + """ |
| 374 | + This provides a centralized method to set the variables on the data |
| 375 | + store. |
| 376 | +
|
| 377 | + Parameters |
| 378 | + ---------- |
| 379 | + variables : dict-like |
| 380 | + Dictionary of key/value (variable name / xr.Variable) pairs |
| 381 | + check_encoding_set : list-like |
| 382 | + List of variables that should be checked for invalid encoding |
| 383 | + values |
| 384 | + writer : |
| 385 | + unlimited_dims : list-like |
| 386 | + List of dimension names that should be treated as unlimited |
| 387 | + dimensions. |
| 388 | + """ |
| 389 | + |
| 390 | + for vn, v in variables.items(): |
| 391 | + name = _encode_variable_name(vn) |
| 392 | + check = vn in check_encoding_set |
| 393 | + attrs = v.attrs.copy() |
| 394 | + dims = v.dims |
| 395 | + dtype = v.dtype |
| 396 | + shape = v.shape |
| 397 | + |
| 398 | + fill_value = attrs.pop('_FillValue', None) |
| 399 | + if v.encoding == {'_FillValue': None} and fill_value is None: |
| 400 | + v.encoding = {} |
| 401 | + if name in self.ds: |
| 402 | + zarr_array = self.ds[name] |
| 403 | + if self.append_dim in dims: |
| 404 | + # this is the DataArray that has append_dim as a |
| 405 | + # dimension |
| 406 | + append_axis = dims.index(self.append_dim) |
| 407 | + new_shape = list(zarr_array.shape) |
| 408 | + new_shape[append_axis] += v.shape[append_axis] |
| 409 | + new_region = [slice(None)] * len(new_shape) |
| 410 | + new_region[append_axis] = slice( |
| 411 | + zarr_array.shape[append_axis], |
| 412 | + None |
| 413 | + ) |
| 414 | + zarr_array.resize(new_shape) |
| 415 | + writer.add(v.data, zarr_array, |
| 416 | + region=tuple(new_region)) |
| 417 | + else: |
| 418 | + # new variable |
| 419 | + encoding = _extract_zarr_variable_encoding( |
| 420 | + v, raise_on_invalid=check) |
| 421 | + encoded_attrs = OrderedDict() |
| 422 | + # the magic for storing the hidden dimension data |
| 423 | + encoded_attrs[_DIMENSION_KEY] = dims |
| 424 | + for k2, v2 in attrs.items(): |
| 425 | + encoded_attrs[k2] = self.encode_attribute(v2) |
| 426 | + |
| 427 | + if coding.strings.check_vlen_dtype(dtype) == str: |
| 428 | + dtype = str |
| 429 | + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, |
| 430 | + fill_value=fill_value, **encoding) |
| 431 | + zarr_array.attrs.put(encoded_attrs) |
| 432 | + writer.add(v.data, zarr_array) |
| 433 | + |
350 | 434 | def close(self):
|
351 | 435 | if self._consolidate_on_close:
|
352 | 436 | import zarr
|
|
0 commit comments