Skip to content

Commit f5ae623

Browse files
authored
Migration of datatree/ops.py -> datatree_ops.py (#8976)
* DAS-2065: direct migration of datatree/ops.py -> datatree_ops.py I considered wedging this into core/ops.py, but the datatree/ops.py stuff is kind of spread into core/ops.py and generated_aggregations.py. * DAS-2065: doc tweak * DAS-2065: Fix leading space in docstrings These are the only docstring that have a leading space and that was causing problems injecting the map_over_subtree information in the Datatree doc strings. * DAS-2065: Puts the docstring addendum as second paragraph This works on most of the docstrings. The DatasetOpsMixin functions (round, argsorg, conj and conjugate) have different format and this gets inserted after the name (which is non standard in most docs) but before the description. * DAS-2065: Change doc search to named captures just for clarity. * DAS-2065: Additonal update to make the addendum a Note Just syntactic sugar to make that work * DAS-2065: Adds tests to doc_addendum * DAS-2065: Add credits * DAS-2065: Adds types
1 parent 748bb3a commit f5ae623

File tree

7 files changed

+151
-24
lines changed

7 files changed

+151
-24
lines changed

Diff for: doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ Internal Changes
7373
``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`)
7474
By `Owen Littlejohns <https://github.com/owenlittlejohns>`_ and
7575
`Tom Nicholas <https://github.com/TomNicholas>`_.
76+
- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`)
77+
By `Matt Savoie <https://github.com/flamingbear>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.
7678
- ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg
7779
rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods
7880
consistent with their use of ``dim``. Using the existing kwarg will raise a

Diff for: xarray/core/dataarray.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5269,7 +5269,7 @@ def differentiate(
52695269
edge_order: Literal[1, 2] = 1,
52705270
datetime_unit: DatetimeUnitOptions = None,
52715271
) -> Self:
5272-
""" Differentiate the array with the second order accurate central
5272+
"""Differentiate the array with the second order accurate central
52735273
differences.
52745274
52755275
.. note::

Diff for: xarray/core/dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8354,7 +8354,7 @@ def differentiate(
83548354
edge_order: Literal[1, 2] = 1,
83558355
datetime_unit: DatetimeUnitOptions | None = None,
83568356
) -> Self:
8357-
""" Differentiate with the second order accurate central
8357+
"""Differentiate with the second order accurate central
83588358
differences.
83598359
83608360
.. note::

Diff for: xarray/core/datatree.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@
2323
check_isomorphic,
2424
map_over_subtree,
2525
)
26+
from xarray.core.datatree_ops import (
27+
DataTreeArithmeticMixin,
28+
MappedDatasetMethodsMixin,
29+
MappedDataWithCoords,
30+
)
2631
from xarray.core.datatree_render import RenderDataTree
2732
from xarray.core.formatting import datatree_repr
2833
from xarray.core.formatting_html import (
@@ -42,11 +47,6 @@
4247
)
4348
from xarray.core.variable import Variable
4449
from xarray.datatree_.datatree.common import TreeAttrAccessMixin
45-
from xarray.datatree_.datatree.ops import (
46-
DataTreeArithmeticMixin,
47-
MappedDatasetMethodsMixin,
48-
MappedDataWithCoords,
49-
)
5050

5151
try:
5252
from xarray.core.variable import calculate_dimensions

Diff for: xarray/core/datatree_mapping.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable:
9898
Function will not be applied to any nodes without datasets.
9999
*args : tuple, optional
100100
Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
101-
via .ds .
101+
via `.ds`.
102102
**kwargs : Any
103103
Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
104-
via .ds .
104+
via `.ds`.
105105
106106
Returns
107107
-------

Diff for: xarray/datatree_/datatree/ops.py renamed to xarray/core/datatree_ops.py

+62-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from __future__ import annotations
2+
3+
import re
14
import textwrap
25

36
from xarray.core.dataset import Dataset
4-
57
from xarray.core.datatree_mapping import map_over_subtree
68

79
"""
@@ -12,11 +14,10 @@
1214
"""
1315

1416

15-
_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
17+
_MAPPED_DOCSTRING_ADDENDUM = (
1618
"This method was copied from xarray.Dataset, but has been altered to "
1719
"call the method on the Datasets stored in every node of the subtree. "
18-
"See the `map_over_subtree` function for more details.",
19-
width=117,
20+
"See the `map_over_subtree` function for more details."
2021
)
2122

2223
# TODO equals, broadcast_equals etc.
@@ -173,7 +174,7 @@ def _wrap_then_attach_to_cls(
173174
target_cls_dict, source_cls, methods_to_set, wrap_func=None
174175
):
175176
"""
176-
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
177+
Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree).
177178
178179
Result is like having written this in the classes' definition:
179180
```
@@ -208,16 +209,62 @@ def method_name(self, *args, **kwargs):
208209
if wrap_func is map_over_subtree:
209210
# Add a paragraph to the method's docstring explaining how it's been mapped
210211
orig_method_docstring = orig_method.__doc__
211-
# if orig_method_docstring is not None:
212-
# if "\n" in orig_method_docstring:
213-
# new_method_docstring = orig_method_docstring.replace(
214-
# "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
215-
# )
216-
# else:
217-
# new_method_docstring = (
218-
# orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
219-
# )
220-
setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)
212+
213+
if orig_method_docstring is not None:
214+
new_method_docstring = insert_doc_addendum(
215+
orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM
216+
)
217+
setattr(target_cls_dict[method_name], "__doc__", new_method_docstring)
218+
219+
220+
def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None:
221+
"""Insert addendum after first paragraph or at the end of the docstring.
222+
223+
There are a number of Dataset's functions that are wrapped. These come from
224+
Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin.
225+
226+
The majority of the docstrings fall into a parseable pattern. Those that
227+
don't, just have the addendum appeneded after. None values are returned.
228+
229+
"""
230+
if docstring is None:
231+
return None
232+
233+
pattern = re.compile(
234+
r"^(?P<start>(\S+)?(.*?))(?P<paragraph_break>\n\s*\n)(?P<whitespace>[ ]*)(?P<rest>.*)",
235+
re.DOTALL,
236+
)
237+
capture = re.match(pattern, docstring)
238+
if capture is None:
239+
### single line docstring.
240+
return (
241+
docstring
242+
+ "\n\n"
243+
+ textwrap.fill(
244+
addendum,
245+
subsequent_indent=" ",
246+
width=79,
247+
)
248+
)
249+
250+
if len(capture.groups()) == 6:
251+
return (
252+
capture["start"]
253+
+ capture["paragraph_break"]
254+
+ capture["whitespace"]
255+
+ ".. note::\n"
256+
+ textwrap.fill(
257+
addendum,
258+
initial_indent=capture["whitespace"] + " ",
259+
subsequent_indent=capture["whitespace"] + " ",
260+
width=79,
261+
)
262+
+ capture["paragraph_break"]
263+
+ capture["whitespace"]
264+
+ capture["rest"]
265+
)
266+
else:
267+
return docstring
221268

222269

223270
class MappedDatasetMethodsMixin:

Diff for: xarray/tests/test_datatree.py

+78
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from copy import copy, deepcopy
2+
from textwrap import dedent
23

34
import numpy as np
45
import pytest
56

67
import xarray as xr
78
from xarray.core.datatree import DataTree
9+
from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum
810
from xarray.core.treenode import NotFoundInTreeError
911
from xarray.testing import assert_equal, assert_identical
1012
from xarray.tests import create_test_data, source_ndarray
@@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree):
824826
expected = create_test_datatree(modify=lambda ds: np.sin(ds))
825827
result_tree = np.sin(dt)
826828
assert_equal(result_tree, expected)
829+
830+
831+
class TestDocInsertion:
832+
"""Tests map_over_subtree docstring injection."""
833+
834+
def test_standard_doc(self):
835+
836+
dataset_doc = dedent(
837+
"""\
838+
Manually trigger loading and/or computation of this dataset's data
839+
from disk or a remote source into memory and return this dataset.
840+
Unlike compute, the original dataset is modified and returned.
841+
842+
Normally, it should not be necessary to call this method in user code,
843+
because all xarray functions should either work on deferred data or
844+
load data automatically. However, this method can be necessary when
845+
working with many file objects on disk.
846+
847+
Parameters
848+
----------
849+
**kwargs : dict
850+
Additional keyword arguments passed on to ``dask.compute``.
851+
852+
See Also
853+
--------
854+
dask.compute"""
855+
)
856+
857+
expected_doc = dedent(
858+
"""\
859+
Manually trigger loading and/or computation of this dataset's data
860+
from disk or a remote source into memory and return this dataset.
861+
Unlike compute, the original dataset is modified and returned.
862+
863+
.. note::
864+
This method was copied from xarray.Dataset, but has been altered to
865+
call the method on the Datasets stored in every node of the
866+
subtree. See the `map_over_subtree` function for more details.
867+
868+
Normally, it should not be necessary to call this method in user code,
869+
because all xarray functions should either work on deferred data or
870+
load data automatically. However, this method can be necessary when
871+
working with many file objects on disk.
872+
873+
Parameters
874+
----------
875+
**kwargs : dict
876+
Additional keyword arguments passed on to ``dask.compute``.
877+
878+
See Also
879+
--------
880+
dask.compute"""
881+
)
882+
883+
wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM)
884+
885+
assert expected_doc == wrapped_doc
886+
887+
def test_one_liner(self):
888+
mixin_doc = "Same as abs(a)."
889+
890+
expected_doc = dedent(
891+
"""\
892+
Same as abs(a).
893+
894+
This method was copied from xarray.Dataset, but has been altered to call the
895+
method on the Datasets stored in every node of the subtree. See the
896+
`map_over_subtree` function for more details."""
897+
)
898+
899+
actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM)
900+
assert expected_doc == actual_doc
901+
902+
def test_none(self):
903+
actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM)
904+
assert actual_doc is None

0 commit comments

Comments
 (0)