diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 184168e551a..0f79b648187 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,6 +73,8 @@ Internal Changes ``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`) By `Owen Littlejohns `_ and `Tom Nicholas `_. +- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`) + By `Matt Savoie `_ and `Tom Nicholas `_. - ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg rather than ``dims`` or ``dimensions``. This is the final change to make xarray methods consistent with their use of ``dim``. Using the existing kwarg will raise a diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d5aa4688ce1..c89dedf1215 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5269,7 +5269,7 @@ def differentiate( edge_order: Literal[1, 2] = 1, datetime_unit: DatetimeUnitOptions = None, ) -> Self: - """ Differentiate the array with the second order accurate central + """Differentiate the array with the second order accurate central differences. .. note:: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 57ddcd9d39d..4fca874bf2c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8354,7 +8354,7 @@ def differentiate( edge_order: Literal[1, 2] = 1, datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: - """ Differentiate with the second order accurate central + """Differentiate with the second order accurate central differences. .. note:: diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 48c714b697c..e8c365a0ec7 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -23,6 +23,11 @@ check_isomorphic, map_over_subtree, ) +from xarray.core.datatree_ops import ( + DataTreeArithmeticMixin, + MappedDatasetMethodsMixin, + MappedDataWithCoords, +) from xarray.core.datatree_render import RenderDataTree from xarray.core.formatting import datatree_repr from xarray.core.formatting_html import ( @@ -42,11 +47,6 @@ ) from xarray.core.variable import Variable from xarray.datatree_.datatree.common import TreeAttrAccessMixin -from xarray.datatree_.datatree.ops import ( - DataTreeArithmeticMixin, - MappedDatasetMethodsMixin, - MappedDataWithCoords, -) try: from xarray.core.variable import calculate_dimensions diff --git a/xarray/core/datatree_mapping.py b/xarray/core/datatree_mapping.py index 4da934f2085..6e5aae15562 100644 --- a/xarray/core/datatree_mapping.py +++ b/xarray/core/datatree_mapping.py @@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable: Function will not be applied to any nodes without datasets. *args : tuple, optional Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets - via .ds . + via `.ds`. **kwargs : Any Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets - via .ds . + via `.ds`. Returns ------- diff --git a/xarray/datatree_/datatree/ops.py b/xarray/core/datatree_ops.py similarity index 75% rename from xarray/datatree_/datatree/ops.py rename to xarray/core/datatree_ops.py index 1ca8a7c1e01..bc64b44ae1e 100644 --- a/xarray/datatree_/datatree/ops.py +++ b/xarray/core/datatree_ops.py @@ -1,7 +1,9 @@ +from __future__ import annotations + +import re import textwrap from xarray.core.dataset import Dataset - from xarray.core.datatree_mapping import map_over_subtree """ @@ -12,11 +14,10 @@ """ -_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill( +_MAPPED_DOCSTRING_ADDENDUM = ( "This method was copied from xarray.Dataset, but has been altered to " "call the method on the Datasets stored in every node of the subtree. " - "See the `map_over_subtree` function for more details.", - width=117, + "See the `map_over_subtree` function for more details." ) # TODO equals, broadcast_equals etc. @@ -173,7 +174,7 @@ def _wrap_then_attach_to_cls( target_cls_dict, source_cls, methods_to_set, wrap_func=None ): """ - Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree) + Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree). Result is like having written this in the classes' definition: ``` @@ -208,16 +209,62 @@ def method_name(self, *args, **kwargs): if wrap_func is map_over_subtree: # Add a paragraph to the method's docstring explaining how it's been mapped orig_method_docstring = orig_method.__doc__ - # if orig_method_docstring is not None: - # if "\n" in orig_method_docstring: - # new_method_docstring = orig_method_docstring.replace( - # "\n", _MAPPED_DOCSTRING_ADDENDUM, 1 - # ) - # else: - # new_method_docstring = ( - # orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}" - # ) - setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring) + + if orig_method_docstring is not None: + new_method_docstring = insert_doc_addendum( + orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM + ) + setattr(target_cls_dict[method_name], "__doc__", new_method_docstring) + + +def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None: + """Insert addendum after first paragraph or at the end of the docstring. + + There are a number of Dataset's functions that are wrapped. These come from + Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin. + + The majority of the docstrings fall into a parseable pattern. Those that + don't, just have the addendum appeneded after. None values are returned. + + """ + if docstring is None: + return None + + pattern = re.compile( + r"^(?P(\S+)?(.*?))(?P\n\s*\n)(?P[ ]*)(?P.*)", + re.DOTALL, + ) + capture = re.match(pattern, docstring) + if capture is None: + ### single line docstring. + return ( + docstring + + "\n\n" + + textwrap.fill( + addendum, + subsequent_indent=" ", + width=79, + ) + ) + + if len(capture.groups()) == 6: + return ( + capture["start"] + + capture["paragraph_break"] + + capture["whitespace"] + + ".. note::\n" + + textwrap.fill( + addendum, + initial_indent=capture["whitespace"] + " ", + subsequent_indent=capture["whitespace"] + " ", + width=79, + ) + + capture["paragraph_break"] + + capture["whitespace"] + + capture["rest"] + ) + else: + return docstring class MappedDatasetMethodsMixin: diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index e667c8670c7..58fec20d4c6 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -1,10 +1,12 @@ from copy import copy, deepcopy +from textwrap import dedent import numpy as np import pytest import xarray as xr from xarray.core.datatree import DataTree +from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum from xarray.core.treenode import NotFoundInTreeError from xarray.testing import assert_equal, assert_identical from xarray.tests import create_test_data, source_ndarray @@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree): expected = create_test_datatree(modify=lambda ds: np.sin(ds)) result_tree = np.sin(dt) assert_equal(result_tree, expected) + + +class TestDocInsertion: + """Tests map_over_subtree docstring injection.""" + + def test_standard_doc(self): + + dataset_doc = dedent( + """\ + Manually trigger loading and/or computation of this dataset's data + from disk or a remote source into memory and return this dataset. + Unlike compute, the original dataset is modified and returned. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. However, this method can be necessary when + working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.compute``. + + See Also + -------- + dask.compute""" + ) + + expected_doc = dedent( + """\ + Manually trigger loading and/or computation of this dataset's data + from disk or a remote source into memory and return this dataset. + Unlike compute, the original dataset is modified and returned. + + .. note:: + This method was copied from xarray.Dataset, but has been altered to + call the method on the Datasets stored in every node of the + subtree. See the `map_over_subtree` function for more details. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. However, this method can be necessary when + working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.compute``. + + See Also + -------- + dask.compute""" + ) + + wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM) + + assert expected_doc == wrapped_doc + + def test_one_liner(self): + mixin_doc = "Same as abs(a)." + + expected_doc = dedent( + """\ + Same as abs(a). + + This method was copied from xarray.Dataset, but has been altered to call the + method on the Datasets stored in every node of the subtree. See the + `map_over_subtree` function for more details.""" + ) + + actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM) + assert expected_doc == actual_doc + + def test_none(self): + actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM) + assert actual_doc is None