From b22a56e5921f28eefc5ee958a91073cafe9e807d Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 24 Apr 2024 17:12:23 -0600 Subject: [PATCH 1/9] DAS-2065: direct migration of datatree/ops.py -> datatree_ops.py I considered wedging this into core/ops.py, but the datatree/ops.py stuff is kind of spread into core/ops.py and generated_aggregations.py. --- xarray/core/datatree.py | 10 +++++----- .../datatree/ops.py => core/datatree_ops.py} | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) rename xarray/{datatree_/datatree/ops.py => core/datatree_ops.py} (99%) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 48c714b697c..e8c365a0ec7 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -23,6 +23,11 @@ check_isomorphic, map_over_subtree, ) +from xarray.core.datatree_ops import ( + DataTreeArithmeticMixin, + MappedDatasetMethodsMixin, + MappedDataWithCoords, +) from xarray.core.datatree_render import RenderDataTree from xarray.core.formatting import datatree_repr from xarray.core.formatting_html import ( @@ -42,11 +47,6 @@ ) from xarray.core.variable import Variable from xarray.datatree_.datatree.common import TreeAttrAccessMixin -from xarray.datatree_.datatree.ops import ( - DataTreeArithmeticMixin, - MappedDatasetMethodsMixin, - MappedDataWithCoords, -) try: from xarray.core.variable import calculate_dimensions diff --git a/xarray/datatree_/datatree/ops.py b/xarray/core/datatree_ops.py similarity index 99% rename from xarray/datatree_/datatree/ops.py rename to xarray/core/datatree_ops.py index 1ca8a7c1e01..f1f88922d7e 100644 --- a/xarray/datatree_/datatree/ops.py +++ b/xarray/core/datatree_ops.py @@ -1,7 +1,6 @@ import textwrap from xarray.core.dataset import Dataset - from xarray.core.datatree_mapping import map_over_subtree """ @@ -173,7 +172,7 @@ def _wrap_then_attach_to_cls( target_cls_dict, source_cls, methods_to_set, wrap_func=None ): """ - Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree) + Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree). Result is like having written this in the classes' definition: ``` From d072845166fd12238abcf7d7e7ae34428e83c620 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 25 Apr 2024 13:43:01 -0600 Subject: [PATCH 2/9] DAS-2065: doc tweak --- xarray/core/datatree_mapping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree_mapping.py b/xarray/core/datatree_mapping.py index 4da934f2085..6e5aae15562 100644 --- a/xarray/core/datatree_mapping.py +++ b/xarray/core/datatree_mapping.py @@ -98,10 +98,10 @@ def map_over_subtree(func: Callable) -> Callable: Function will not be applied to any nodes without datasets. *args : tuple, optional Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets - via .ds . + via `.ds`. **kwargs : Any Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets - via .ds . + via `.ds`. Returns ------- From 7d59d3abfef1aef8f04267e51f3c7137c24c8664 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 25 Apr 2024 16:45:50 -0600 Subject: [PATCH 3/9] DAS-2065: Fix leading space in docstrings These are the only docstring that have a leading space and that was causing problems injecting the map_over_subtree information in the Datatree doc strings. --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 509962ff80d..4177ca164ae 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5260,7 +5260,7 @@ def differentiate( edge_order: Literal[1, 2] = 1, datetime_unit: DatetimeUnitOptions = None, ) -> Self: - """ Differentiate the array with the second order accurate central + """Differentiate the array with the second order accurate central differences. .. note:: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 96f3be00995..6d62fd4e3c2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8313,7 +8313,7 @@ def differentiate( edge_order: Literal[1, 2] = 1, datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: - """ Differentiate with the second order accurate central + """Differentiate with the second order accurate central differences. .. note:: From 59540342b051a1d72e9cbd64020cc7b333df14c6 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 25 Apr 2024 19:28:38 -0600 Subject: [PATCH 4/9] DAS-2065: Puts the docstring addendum as second paragraph This works on most of the docstrings. The DatasetOpsMixin functions (round, argsorg, conj and conjugate) have different format and this gets inserted after the name (which is non standard in most docs) but before the description. --- xarray/core/datatree_ops.py | 51 +++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index f1f88922d7e..37bc539c65d 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -1,3 +1,4 @@ +import re import textwrap from xarray.core.dataset import Dataset @@ -11,11 +12,10 @@ """ -_MAPPED_DOCSTRING_ADDENDUM = textwrap.fill( +_MAPPED_DOCSTRING_ADDENDUM = ( "This method was copied from xarray.Dataset, but has been altered to " "call the method on the Datasets stored in every node of the subtree. " - "See the `map_over_subtree` function for more details.", - width=117, + "See the `map_over_subtree` function for more details." ) # TODO equals, broadcast_equals etc. @@ -207,16 +207,41 @@ def method_name(self, *args, **kwargs): if wrap_func is map_over_subtree: # Add a paragraph to the method's docstring explaining how it's been mapped orig_method_docstring = orig_method.__doc__ - # if orig_method_docstring is not None: - # if "\n" in orig_method_docstring: - # new_method_docstring = orig_method_docstring.replace( - # "\n", _MAPPED_DOCSTRING_ADDENDUM, 1 - # ) - # else: - # new_method_docstring = ( - # orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}" - # ) - setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring) + + if orig_method_docstring is not None: + new_method_docstring = insert_doc_addendum( + orig_method_docstring, _MAPPED_DOCSTRING_ADDENDUM + ) + setattr(target_cls_dict[method_name], "__doc__", new_method_docstring) + + +def insert_doc_addendum(docstring, addendum): + """Insert addendum after first paragraph or at the end of the docstring.""" + pattern = re.compile(r"^((\S+)?(.*?))(\n\s*\n)([ ]*)(.*)", re.DOTALL) + capture = re.match(pattern, docstring) + if capture is None: + ### single line docstring. + return docstring + "\n\n" + addendum + + g = capture.groups() + if len(g) == 6: + whitespace = g[4] + paragraph_break = g[3] + return ( + g[0] + + paragraph_break + + textwrap.fill( + addendum, + initial_indent=whitespace, + subsequent_indent=whitespace, + width=79, + ) + + paragraph_break + + whitespace + + g[5] + ) + else: + return docstring class MappedDatasetMethodsMixin: From 3a494ae53bb2cce407779e1e83256999d9fd99e4 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 26 Apr 2024 10:20:06 -0600 Subject: [PATCH 5/9] DAS-2065: Change doc search to named captures just for clarity. --- xarray/core/datatree_ops.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index 37bc539c65d..4b02c92876c 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -217,28 +217,28 @@ def method_name(self, *args, **kwargs): def insert_doc_addendum(docstring, addendum): """Insert addendum after first paragraph or at the end of the docstring.""" - pattern = re.compile(r"^((\S+)?(.*?))(\n\s*\n)([ ]*)(.*)", re.DOTALL) + pattern = re.compile( + r"^(?P(\S+)?(.*?))(?P\n\s*\n)(?P[ ]*)(?P.*)", + re.DOTALL, + ) capture = re.match(pattern, docstring) if capture is None: ### single line docstring. return docstring + "\n\n" + addendum - g = capture.groups() - if len(g) == 6: - whitespace = g[4] - paragraph_break = g[3] + if len(capture.groups()) == 6: return ( - g[0] - + paragraph_break + capture["front"] + + capture["paragraph_break"] + textwrap.fill( addendum, - initial_indent=whitespace, - subsequent_indent=whitespace, + initial_indent=capture["whitespace"], + subsequent_indent=capture["whitespace"], width=79, ) - + paragraph_break - + whitespace - + g[5] + + capture["paragraph_break"] + + capture["whitespace"] + + capture["rest"] ) else: return docstring From 5859b5c1c46ec5fc43e67ef93a11c68f0cd797ba Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 26 Apr 2024 12:39:53 -0600 Subject: [PATCH 6/9] DAS-2065: Additonal update to make the addendum a Note Just syntactic sugar to make that work --- xarray/core/datatree_ops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index 4b02c92876c..136f1747b1f 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -218,7 +218,7 @@ def method_name(self, *args, **kwargs): def insert_doc_addendum(docstring, addendum): """Insert addendum after first paragraph or at the end of the docstring.""" pattern = re.compile( - r"^(?P(\S+)?(.*?))(?P\n\s*\n)(?P[ ]*)(?P.*)", + r"^(?P(\S+)?(.*?))(?P\n\s*\n)(?P[ ]*)(?P.*)", re.DOTALL, ) capture = re.match(pattern, docstring) @@ -228,12 +228,14 @@ def insert_doc_addendum(docstring, addendum): if len(capture.groups()) == 6: return ( - capture["front"] + capture["start"] + capture["paragraph_break"] + + capture["whitespace"] + + ".. note::\n" + textwrap.fill( addendum, - initial_indent=capture["whitespace"], - subsequent_indent=capture["whitespace"], + initial_indent=capture["whitespace"] + " ", + subsequent_indent=capture["whitespace"] + " ", width=79, ) + capture["paragraph_break"] From 3575257944af9f25324a8d0625715f6f83365513 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 26 Apr 2024 14:02:51 -0600 Subject: [PATCH 7/9] DAS-2065: Adds tests to doc_addendum --- xarray/core/datatree_ops.py | 23 ++++++++++- xarray/tests/test_datatree.py | 78 +++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index 136f1747b1f..36948a28617 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -216,7 +216,18 @@ def method_name(self, *args, **kwargs): def insert_doc_addendum(docstring, addendum): - """Insert addendum after first paragraph or at the end of the docstring.""" + """Insert addendum after first paragraph or at the end of the docstring. + + There are a number of Dataset's functions that are wrapped. These come from + Dataset directly as well as the mixins: DataWithCoords, DatasetAggregations, and DatasetOpsMixin. + + The majority of the docstrings fall into a parseable pattern. Those that + don't, just have the addendum appeneded after. None values are returned. + + """ + if docstring is None: + return None + pattern = re.compile( r"^(?P(\S+)?(.*?))(?P\n\s*\n)(?P[ ]*)(?P.*)", re.DOTALL, @@ -224,7 +235,15 @@ def insert_doc_addendum(docstring, addendum): capture = re.match(pattern, docstring) if capture is None: ### single line docstring. - return docstring + "\n\n" + addendum + return ( + docstring + + "\n\n" + + textwrap.fill( + addendum, + subsequent_indent=" ", + width=79, + ) + ) if len(capture.groups()) == 6: return ( diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index e667c8670c7..58fec20d4c6 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -1,10 +1,12 @@ from copy import copy, deepcopy +from textwrap import dedent import numpy as np import pytest import xarray as xr from xarray.core.datatree import DataTree +from xarray.core.datatree_ops import _MAPPED_DOCSTRING_ADDENDUM, insert_doc_addendum from xarray.core.treenode import NotFoundInTreeError from xarray.testing import assert_equal, assert_identical from xarray.tests import create_test_data, source_ndarray @@ -824,3 +826,79 @@ def test_tree(self, create_test_datatree): expected = create_test_datatree(modify=lambda ds: np.sin(ds)) result_tree = np.sin(dt) assert_equal(result_tree, expected) + + +class TestDocInsertion: + """Tests map_over_subtree docstring injection.""" + + def test_standard_doc(self): + + dataset_doc = dedent( + """\ + Manually trigger loading and/or computation of this dataset's data + from disk or a remote source into memory and return this dataset. + Unlike compute, the original dataset is modified and returned. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. However, this method can be necessary when + working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.compute``. + + See Also + -------- + dask.compute""" + ) + + expected_doc = dedent( + """\ + Manually trigger loading and/or computation of this dataset's data + from disk or a remote source into memory and return this dataset. + Unlike compute, the original dataset is modified and returned. + + .. note:: + This method was copied from xarray.Dataset, but has been altered to + call the method on the Datasets stored in every node of the + subtree. See the `map_over_subtree` function for more details. + + Normally, it should not be necessary to call this method in user code, + because all xarray functions should either work on deferred data or + load data automatically. However, this method can be necessary when + working with many file objects on disk. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments passed on to ``dask.compute``. + + See Also + -------- + dask.compute""" + ) + + wrapped_doc = insert_doc_addendum(dataset_doc, _MAPPED_DOCSTRING_ADDENDUM) + + assert expected_doc == wrapped_doc + + def test_one_liner(self): + mixin_doc = "Same as abs(a)." + + expected_doc = dedent( + """\ + Same as abs(a). + + This method was copied from xarray.Dataset, but has been altered to call the + method on the Datasets stored in every node of the subtree. See the + `map_over_subtree` function for more details.""" + ) + + actual_doc = insert_doc_addendum(mixin_doc, _MAPPED_DOCSTRING_ADDENDUM) + assert expected_doc == actual_doc + + def test_none(self): + actual_doc = insert_doc_addendum(None, _MAPPED_DOCSTRING_ADDENDUM) + assert actual_doc is None From f80a4668dc0719745f417345d8185e96e22444b4 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 26 Apr 2024 14:18:54 -0600 Subject: [PATCH 8/9] DAS-2065: Add credits --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8b05b729a31..c9c49374b20 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,6 +53,8 @@ Internal Changes ``xarray/testing/assertions`` for ``DataTree``. (:pull:`8967`) By `Owen Littlejohns `_ and `Tom Nicholas `_. +- Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`) + By `Matt Savoie `_ and `Tom Nicholas `_. .. _whats-new.2024.03.0: From 696cd3eb764c7a2ecf4be6bdffdf7839e19a509a Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 30 Apr 2024 16:51:55 -0600 Subject: [PATCH 9/9] DAS-2065: Adds types --- xarray/core/datatree_ops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/datatree_ops.py b/xarray/core/datatree_ops.py index 36948a28617..bc64b44ae1e 100644 --- a/xarray/core/datatree_ops.py +++ b/xarray/core/datatree_ops.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import re import textwrap @@ -215,7 +217,7 @@ def method_name(self, *args, **kwargs): setattr(target_cls_dict[method_name], "__doc__", new_method_docstring) -def insert_doc_addendum(docstring, addendum): +def insert_doc_addendum(docstring: str | None, addendum: str) -> str | None: """Insert addendum after first paragraph or at the end of the docstring. There are a number of Dataset's functions that are wrapped. These come from