-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Migrate datatreee assertions/extensions/formatting #8967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b565c92
ace6188
2e703dd
0338eff
3f963cb
a55e499
3f92413
e4b7a42
63e2836
970b10d
ea7691a
372dcbe
6033f2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,266 @@ | ||
""" | ||
String Tree Rendering. Copied from anytree. | ||
|
||
Minor changes to `RenderDataTree` include accessing `children.values()`, and | ||
type hints. | ||
|
||
""" | ||
|
||
from __future__ import annotations | ||
|
||
from collections import namedtuple | ||
from collections.abc import Iterable, Iterator | ||
from typing import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from xarray.core.datatree import DataTree | ||
|
||
Row = namedtuple("Row", ("pre", "fill", "node")) | ||
|
||
|
||
class AbstractStyle: | ||
def __init__(self, vertical: str, cont: str, end: str): | ||
""" | ||
Tree Render Style. | ||
Args: | ||
vertical: Sign for vertical line. | ||
cont: Chars for a continued branch. | ||
end: Chars for the last branch. | ||
""" | ||
super().__init__() | ||
self.vertical = vertical | ||
self.cont = cont | ||
self.end = end | ||
assert ( | ||
len(cont) == len(vertical) == len(end) | ||
), f"'{vertical}', '{cont}' and '{end}' need to have equal length" | ||
|
||
@property | ||
def empty(self) -> str: | ||
"""Empty string as placeholder.""" | ||
return " " * len(self.end) | ||
|
||
def __repr__(self) -> str: | ||
return f"{self.__class__.__name__}()" | ||
|
||
|
||
class ContStyle(AbstractStyle): | ||
def __init__(self): | ||
""" | ||
Continued style, without gaps. | ||
|
||
>>> from xarray.core.datatree import DataTree | ||
>>> from xarray.core.datatree_render import RenderDataTree | ||
>>> root = DataTree(name="root") | ||
>>> s0 = DataTree(name="sub0", parent=root) | ||
>>> s0b = DataTree(name="sub0B", parent=s0) | ||
>>> s0a = DataTree(name="sub0A", parent=s0) | ||
>>> s1 = DataTree(name="sub1", parent=root) | ||
>>> print(RenderDataTree(root)) | ||
DataTree('root', parent=None) | ||
├── DataTree('sub0') | ||
│ ├── DataTree('sub0B') | ||
│ └── DataTree('sub0A') | ||
└── DataTree('sub1') | ||
""" | ||
super().__init__("\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 ") | ||
|
||
|
||
class RenderDataTree: | ||
def __init__( | ||
self, | ||
node: DataTree, | ||
style=ContStyle(), | ||
childiter: type = list, | ||
maxlevel: int | None = None, | ||
): | ||
""" | ||
Render tree starting at `node`. | ||
Keyword Args: | ||
style (AbstractStyle): Render Style. | ||
childiter: Child iterator. Note, due to the use of node.children.values(), | ||
Iterables that change the order of children cannot be used | ||
(e.g., `reversed`). | ||
maxlevel: Limit rendering to this depth. | ||
:any:`RenderDataTree` is an iterator, returning a tuple with 3 items: | ||
`pre` | ||
tree prefix. | ||
`fill` | ||
filling for multiline entries. | ||
`node` | ||
:any:`NodeMixin` object. | ||
It is up to the user to assemble these parts to a whole. | ||
|
||
Examples | ||
-------- | ||
|
||
>>> from xarray import Dataset | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The examples in this documentation string are a bit shorter than the originals from |
||
>>> from xarray.core.datatree import DataTree | ||
>>> from xarray.core.datatree_render import RenderDataTree | ||
>>> root = DataTree(name="root", data=Dataset({"a": 0, "b": 1})) | ||
>>> s0 = DataTree(name="sub0", parent=root, data=Dataset({"c": 2, "d": 3})) | ||
>>> s0b = DataTree(name="sub0B", parent=s0, data=Dataset({"e": 4})) | ||
>>> s0a = DataTree(name="sub0A", parent=s0, data=Dataset({"f": 5, "g": 6})) | ||
>>> s1 = DataTree(name="sub1", parent=root, data=Dataset({"h": 7})) | ||
|
||
# Simple one line: | ||
|
||
>>> for pre, _, node in RenderDataTree(root): | ||
... print(f"{pre}{node.name}") | ||
... | ||
root | ||
├── sub0 | ||
│ ├── sub0B | ||
│ └── sub0A | ||
└── sub1 | ||
|
||
# Multiline: | ||
|
||
>>> for pre, fill, node in RenderDataTree(root): | ||
... print(f"{pre}{node.name}") | ||
... for variable in node.variables: | ||
... print(f"{fill}{variable}") | ||
... | ||
root | ||
a | ||
b | ||
├── sub0 | ||
│ c | ||
│ d | ||
│ ├── sub0B | ||
│ │ e | ||
│ └── sub0A | ||
│ f | ||
│ g | ||
└── sub1 | ||
h | ||
|
||
:any:`by_attr` simplifies attribute rendering and supports multiline: | ||
>>> print(RenderDataTree(root).by_attr()) | ||
root | ||
├── sub0 | ||
│ ├── sub0B | ||
│ └── sub0A | ||
└── sub1 | ||
|
||
# `maxlevel` limits the depth of the tree: | ||
|
||
>>> print(RenderDataTree(root, maxlevel=2).by_attr("name")) | ||
root | ||
├── sub0 | ||
└── sub1 | ||
""" | ||
if not isinstance(style, AbstractStyle): | ||
style = style() | ||
self.node = node | ||
self.style = style | ||
self.childiter = childiter | ||
self.maxlevel = maxlevel | ||
|
||
def __iter__(self) -> Iterator[Row]: | ||
return self.__next(self.node, tuple()) | ||
|
||
def __next( | ||
self, node: DataTree, continues: tuple[bool, ...], level: int = 0 | ||
) -> Iterator[Row]: | ||
yield RenderDataTree.__item(node, continues, self.style) | ||
children = node.children.values() | ||
level += 1 | ||
if children and (self.maxlevel is None or level < self.maxlevel): | ||
children = self.childiter(children) | ||
for child, is_last in _is_last(children): | ||
yield from self.__next(child, continues + (not is_last,), level=level) | ||
|
||
@staticmethod | ||
def __item( | ||
node: DataTree, continues: tuple[bool, ...], style: AbstractStyle | ||
) -> Row: | ||
if not continues: | ||
return Row("", "", node) | ||
else: | ||
items = [style.vertical if cont else style.empty for cont in continues] | ||
indent = "".join(items[:-1]) | ||
branch = style.cont if continues[-1] else style.end | ||
pre = indent + branch | ||
fill = "".join(items) | ||
return Row(pre, fill, node) | ||
|
||
def __str__(self) -> str: | ||
return str(self.node) | ||
|
||
def __repr__(self) -> str: | ||
classname = self.__class__.__name__ | ||
args = [ | ||
repr(self.node), | ||
f"style={repr(self.style)}", | ||
f"childiter={repr(self.childiter)}", | ||
] | ||
return f"{classname}({', '.join(args)})" | ||
|
||
def by_attr(self, attrname: str = "name") -> str: | ||
""" | ||
Return rendered tree with node attribute `attrname`. | ||
|
||
Examples | ||
-------- | ||
|
||
>>> from xarray import Dataset | ||
>>> from xarray.core.datatree import DataTree | ||
>>> from xarray.core.datatree_render import RenderDataTree | ||
>>> root = DataTree(name="root") | ||
>>> s0 = DataTree(name="sub0", parent=root) | ||
>>> s0b = DataTree( | ||
... name="sub0B", parent=s0, data=Dataset({"foo": 4, "bar": 109}) | ||
... ) | ||
>>> s0a = DataTree(name="sub0A", parent=s0) | ||
>>> s1 = DataTree(name="sub1", parent=root) | ||
>>> s1a = DataTree(name="sub1A", parent=s1) | ||
>>> s1b = DataTree(name="sub1B", parent=s1, data=Dataset({"bar": 8})) | ||
>>> s1c = DataTree(name="sub1C", parent=s1) | ||
>>> s1ca = DataTree(name="sub1Ca", parent=s1c) | ||
>>> print(RenderDataTree(root).by_attr("name")) | ||
root | ||
├── sub0 | ||
│ ├── sub0B | ||
│ └── sub0A | ||
└── sub1 | ||
├── sub1A | ||
├── sub1B | ||
└── sub1C | ||
└── sub1Ca | ||
""" | ||
|
||
def get() -> Iterator[str]: | ||
for pre, fill, node in self: | ||
attr = ( | ||
attrname(node) | ||
if callable(attrname) | ||
else getattr(node, attrname, "") | ||
) | ||
if isinstance(attr, (list, tuple)): | ||
lines = attr | ||
else: | ||
lines = str(attr).split("\n") | ||
yield f"{pre}{lines[0]}" | ||
for line in lines[1:]: | ||
yield f"{fill}{line}" | ||
|
||
return "\n".join(get()) | ||
|
||
|
||
def _is_last(iterable: Iterable) -> Iterator[tuple[DataTree, bool]]: | ||
iter_ = iter(iterable) | ||
try: | ||
nextitem = next(iter_) | ||
except StopIteration: | ||
pass | ||
else: | ||
item = nextitem | ||
while True: | ||
try: | ||
nextitem = next(iter_) | ||
yield item, False | ||
except StopIteration: | ||
yield nextitem, True | ||
break | ||
item = nextitem |
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import textwrap | ||
|
||
from xarray import Dataset | ||
from xarray.core.dataset import Dataset | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was causing another circular dependency issue. @flamingbear - just FYI, for when you are tweaking |
||
|
||
from xarray.core.datatree_mapping import map_over_subtree | ||
|
||
|
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# TODO: Add assert_isomorphic when making DataTree API public | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assumed we didn't want to surface There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought there was an issue collecting things we need to do to put a final bow on things, but I'm not finding it. Should we add it to #8572? or is that overkill? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find a dedicated issue for that either. Yes lets' just make an explicit list under Expose datatree API publicly. on #8572 (I'll do that now) |
||
from xarray.testing.assertions import ( # noqa: F401 | ||
_assert_dataarray_invariants, | ||
_assert_dataset_invariants, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import functools | ||
import warnings | ||
from collections.abc import Hashable | ||
from typing import Union | ||
from typing import Union, overload | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
@@ -12,6 +12,8 @@ | |
from xarray.core.coordinates import Coordinates | ||
from xarray.core.dataarray import DataArray | ||
from xarray.core.dataset import Dataset | ||
from xarray.core.datatree import DataTree | ||
from xarray.core.formatting import diff_datatree_repr | ||
from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes | ||
from xarray.core.variable import IndexVariable, Variable | ||
|
||
|
@@ -50,7 +52,59 @@ def _data_allclose_or_equiv(arr1, arr2, rtol=1e-05, atol=1e-08, decode_bytes=Tru | |
|
||
|
||
@ensure_warnings | ||
def assert_equal(a, b): | ||
def assert_isomorphic(a: DataTree, b: DataTree, from_root: bool = False): | ||
""" | ||
Two DataTrees are considered isomorphic if every node has the same number of children. | ||
Nothing about the data or attrs in each node is checked. | ||
Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation, | ||
such as tree1 + tree2. | ||
By default this function does not check any part of the tree above the given node. | ||
Therefore this function can be used as default to check that two subtrees are isomorphic. | ||
Parameters | ||
---------- | ||
a : DataTree | ||
The first object to compare. | ||
b : DataTree | ||
The second object to compare. | ||
from_root : bool, optional, default is False | ||
Whether or not to first traverse to the root of the trees before checking for isomorphism. | ||
If a & b have no parents then this has no effect. | ||
See Also | ||
-------- | ||
DataTree.isomorphic | ||
assert_equal | ||
assert_identical | ||
""" | ||
__tracebackhide__ = True | ||
assert isinstance(a, type(b)) | ||
|
||
if isinstance(a, DataTree): | ||
if from_root: | ||
a = a.root | ||
b = b.root | ||
|
||
assert a.isomorphic(b, from_root=from_root), diff_datatree_repr( | ||
a, b, "isomorphic" | ||
) | ||
else: | ||
raise TypeError(f"{type(a)} not of type DataTree") | ||
|
||
|
||
@overload | ||
def assert_equal(a, b): ... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initially tried to specify all the individual overloads (e.g., Hopefully this hits enough of the spot, though. |
||
|
||
|
||
@overload | ||
def assert_equal(a: DataTree, b: DataTree, from_root: bool = True): ... | ||
|
||
|
||
@ensure_warnings | ||
def assert_equal(a, b, from_root=True): | ||
"""Like :py:func:`numpy.testing.assert_array_equal`, but for xarray | ||
objects. | ||
|
@@ -59,12 +113,20 @@ def assert_equal(a, b): | |
(except for Dataset objects for which the variable names must match). | ||
Arrays with NaN in the same location are considered equal. | ||
For DataTree objects, assert_equal is mapped over all Datasets on each node, | ||
with the DataTrees being equal if both are isomorphic and the corresponding | ||
Datasets at each node are themselves equal. | ||
Parameters | ||
---------- | ||
a : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates | ||
The first object to compare. | ||
b : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates | ||
The second object to compare. | ||
a : xarray.Dataset, xarray.DataArray, xarray.Variable, xarray.Coordinates | ||
or xarray.core.datatree.DataTree. The first object to compare. | ||
b : xarray.Dataset, xarray.DataArray, xarray.Variable, xarray.Coordinates | ||
or xarray.core.datatree.DataTree. The second object to compare. | ||
from_root : bool, optional, default is True | ||
Only used when comparing DataTree objects. Indicates whether or not to | ||
first traverse to the root of the trees before checking for isomorphism. | ||
If a & b have no parents then this has no effect. | ||
See Also | ||
-------- | ||
|
@@ -81,23 +143,45 @@ def assert_equal(a, b): | |
assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals") | ||
elif isinstance(a, Coordinates): | ||
assert a.equals(b), formatting.diff_coords_repr(a, b, "equals") | ||
elif isinstance(a, DataTree): | ||
if from_root: | ||
a = a.root | ||
b = b.root | ||
|
||
assert a.equals(b, from_root=from_root), diff_datatree_repr(a, b, "equals") | ||
else: | ||
raise TypeError(f"{type(a)} not supported by assertion comparison") | ||
|
||
|
||
@overload | ||
def assert_identical(a, b): ... | ||
|
||
|
||
@overload | ||
def assert_identical(a: DataTree, b: DataTree, from_root: bool = True): ... | ||
|
||
|
||
@ensure_warnings | ||
def assert_identical(a, b): | ||
def assert_identical(a, b, from_root=True): | ||
"""Like :py:func:`xarray.testing.assert_equal`, but also matches the | ||
objects' names and attributes. | ||
Raises an AssertionError if two objects are not identical. | ||
For DataTree objects, assert_identical is mapped over all Datasets on each | ||
node, with the DataTrees being identical if both are isomorphic and the | ||
corresponding Datasets at each node are themselves identical. | ||
Parameters | ||
---------- | ||
a : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates | ||
The first object to compare. | ||
b : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates | ||
The second object to compare. | ||
from_root : bool, optional, default is True | ||
Only used when comparing DataTree objects. Indicates whether or not to | ||
first traverse to the root of the trees before checking for isomorphism. | ||
If a & b have no parents then this has no effect. | ||
See Also | ||
-------- | ||
|
@@ -116,6 +200,14 @@ def assert_identical(a, b): | |
assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical") | ||
elif isinstance(a, Coordinates): | ||
assert a.identical(b), formatting.diff_coords_repr(a, b, "identical") | ||
elif isinstance(a, DataTree): | ||
if from_root: | ||
a = a.root | ||
b = b.root | ||
|
||
assert a.identical(b, from_root=from_root), diff_datatree_repr( | ||
a, b, "identical" | ||
) | ||
else: | ||
raise TypeError(f"{type(a)} not supported by assertion comparison") | ||
|
||
|
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,9 +5,14 @@ | |
import pytest | ||
|
||
import xarray as xr | ||
|
||
# TODO: Remove imports in favour of xr.DataTree etc, once part of public API | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also hoping that once we can use |
||
from xarray.core.datatree import DataTree | ||
from xarray.core.extensions import register_datatree_accessor | ||
from xarray.tests import assert_identical | ||
|
||
|
||
@register_datatree_accessor("example_accessor") | ||
@xr.register_dataset_accessor("example_accessor") | ||
@xr.register_dataarray_accessor("example_accessor") | ||
class ExampleAccessor: | ||
|
@@ -19,6 +24,7 @@ def __init__(self, xarray_obj): | |
|
||
class TestAccessor: | ||
def test_register(self) -> None: | ||
@register_datatree_accessor("demo") | ||
@xr.register_dataset_accessor("demo") | ||
@xr.register_dataarray_accessor("demo") | ||
class DemoAccessor: | ||
|
@@ -31,6 +37,9 @@ def __init__(self, xarray_obj): | |
def foo(self): | ||
return "bar" | ||
|
||
dt: DataTree = DataTree() | ||
assert dt.demo.foo == "bar" | ||
|
||
ds = xr.Dataset() | ||
assert ds.demo.foo == "bar" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved this into
xarray/core/formatting.py
to avoid a circular dependency issue.