Skip to content

Commit a66565c

Browse files
authored
Add data catalog name to nodes to make representation unique. (#650)
1 parent 541cfc6 commit a66565c

14 files changed

+140
-12
lines changed

docs/source/changes.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ chronological order. Releases follow [semantic versioning](https://semver.org/)
55
releases are available on [PyPI](https://pypi.org/project/pytask) and
66
[Anaconda.org](https://anaconda.org/conda-forge/pytask).
77

8+
## 0.5.3 - 2025-xx-xx
9+
10+
- {pull}`650` allows to identify from which data catalog a node is coming from. Thanks
11+
to {user}`felixschmitz` for the report! The feature is enabled by adding an
12+
`attributes` field on `PNode` and `PProvisionalNode` that will be mandatory on custom
13+
nodes in v0.6.0.
14+
815
## 0.5.2 - 2024-12-19
916

1017
- {pull}`633` adds support for Python 3.13 and drops support for 3.8.

docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,20 @@ class PickleNode:
1515
Name of the node which makes it identifiable in the DAG.
1616
path
1717
The path to the file.
18+
attributes
19+
Additional attributes that are stored in the node.
1820
1921
"""
2022

21-
def __init__(self, name: str = "", path: Path | None = None) -> None:
23+
def __init__(
24+
self,
25+
name: str = "",
26+
path: Path | None = None,
27+
attributes: dict[Any, Any] | None = None,
28+
) -> None:
2229
self.name = name
2330
self.path = path
31+
self.attributes = attributes or {}
2432

2533
@property
2634
def signature(self) -> str:

docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,20 @@ class PickleNode:
1616
Name of the node which makes it identifiable in the DAG.
1717
path
1818
The path to the file.
19+
attributes
20+
Additional attributes that are stored in the node.
1921
2022
"""
2123

22-
def __init__(self, name: str = "", path: Optional[Path] = None) -> None:
24+
def __init__(
25+
self,
26+
name: str = "",
27+
path: Optional[Path] = None,
28+
attributes: Optional[dict[Any, Any]] = None,
29+
) -> None:
2330
self.name = name
2431
self.path = path
32+
self.attributes = attributes or {}
2533

2634
@property
2735
def signature(self) -> str:

pyproject.toml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,6 @@ Tracker = "https://github.com/pytask-dev/pytask/issues"
8484
[project.scripts]
8585
pytask = "pytask:cli"
8686

87-
[tool.uv.sources]
88-
pytask-parallel = { workspace = true }
89-
90-
[tool.uv.workspace]
91-
members = ["packages/*"]
92-
9387
[tool.uv]
9488
dev-dependencies = [
9589
"tox-uv>=1.7.0", "pygraphviz;platform_system=='Linux'",

src/_pytask/click.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from _pytask import __version__ as version
2626
from _pytask.console import console
27+
from _pytask.console import create_panel_title
2728

2829
if TYPE_CHECKING:
2930
from collections.abc import Sequence
@@ -109,7 +110,7 @@ def format_help(
109110
console.print(
110111
Panel(
111112
commands_table,
112-
title="[bold #f2f2f2]Commands[/]",
113+
title=create_panel_title("Commands"),
113114
title_align="left",
114115
border_style="grey37",
115116
)
@@ -244,7 +245,7 @@ def _print_options(group_or_command: Command | DefaultGroup, ctx: Context) -> No
244245
console.print(
245246
Panel(
246247
options_table,
247-
title="[bold #f2f2f2]Options[/]",
248+
title=create_panel_title("Options"),
248249
title_align="left",
249250
border_style="grey37",
250251
)

src/_pytask/collect.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from _pytask.node_protocols import PPathNode
3434
from _pytask.node_protocols import PProvisionalNode
3535
from _pytask.node_protocols import PTask
36+
from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes
3637
from _pytask.nodes import DirectoryNode
3738
from _pytask.nodes import PathNode
3839
from _pytask.nodes import PythonNode
@@ -385,6 +386,9 @@ def pytask_collect_node( # noqa: C901, PLR0912
385386
"""
386387
node = node_info.value
387388

389+
if isinstance(node, (PNode, PProvisionalNode)) and not hasattr(node, "attributes"):
390+
warn_about_upcoming_attributes_field_on_nodes()
391+
388392
if isinstance(node, DirectoryNode):
389393
if node.root_dir is None:
390394
node.root_dir = path

src/_pytask/console.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from rich.theme import Theme
2525
from rich.tree import Tree
2626

27+
from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD
2728
from _pytask.node_protocols import PNode
2829
from _pytask.node_protocols import PPathNode
2930
from _pytask.node_protocols import PProvisionalNode
@@ -42,6 +43,7 @@
4243

4344
__all__ = [
4445
"console",
46+
"create_panel_title",
4547
"create_summary_panel",
4648
"create_url_style_for_path",
4749
"create_url_style_for_task",
@@ -146,6 +148,11 @@ def format_node_name(
146148
"""Format the name of a node."""
147149
if isinstance(node, PPathNode):
148150
if node.name != node.path.as_posix():
151+
# For example, any node added to a data catalog has its name set to the key.
152+
if data_catalog_name := getattr(node, "attributes", {}).get(
153+
DATA_CATALOG_NAME_FIELD
154+
):
155+
return Text(f"{data_catalog_name}::{node.name}")
149156
return Text(node.name)
150157
name = shorten_path(node.path, paths)
151158
return Text(name)
@@ -156,6 +163,11 @@ def format_node_name(
156163
reduced_name = shorten_path(Path(path), paths)
157164
return Text(f"{reduced_name}::{rest}")
158165

166+
# Python or other custom nodes that are not PathNodes.
167+
if data_catalog_name := getattr(node, "attributes", {}).get(
168+
DATA_CATALOG_NAME_FIELD
169+
):
170+
return Text(f"{data_catalog_name}::{node.name}")
159171
return Text(node.name)
160172

161173

@@ -293,10 +305,15 @@ def create_summary_panel(
293305

294306
return Panel(
295307
grid,
296-
title="[bold #f2f2f2]Summary[/]",
308+
title=create_panel_title("Summary"),
297309
expand=False,
298310
style="none",
299311
border_style=outcome_enum.FAIL.style
300312
if counts[outcome_enum.FAIL]
301313
else outcome_enum.SUCCESS.style,
302314
)
315+
316+
317+
def create_panel_title(title: str) -> Text:
318+
"""Create a title for a panel."""
319+
return Text(title, style="bold #f2f2f2")

src/_pytask/data_catalog.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
from attrs import field
1818

1919
from _pytask.config_utils import find_project_root_and_config
20+
from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD
2021
from _pytask.exceptions import NodeNotCollectedError
2122
from _pytask.models import NodeInfo
2223
from _pytask.node_protocols import PNode
2324
from _pytask.node_protocols import PPathNode
2425
from _pytask.node_protocols import PProvisionalNode
26+
from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes
2527
from _pytask.nodes import PickleNode
2628
from _pytask.pluginmanager import storage
2729
from _pytask.session import Session
@@ -92,6 +94,10 @@ def __attrs_post_init__(self) -> None:
9294
# Initialize the data catalog with persisted nodes from previous runs.
9395
for path in self.path.glob("*-node.pkl"):
9496
node = pickle.loads(path.read_bytes()) # noqa: S301
97+
if not hasattr(node, "attributes"):
98+
warn_about_upcoming_attributes_field_on_nodes()
99+
else:
100+
node.attributes = {DATA_CATALOG_NAME_FIELD: self.name}
95101
self._entries[node.name] = node
96102

97103
def __getitem__(self, name: str) -> PNode | PProvisionalNode:
@@ -133,3 +139,9 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
133139
msg = f"{node!r} cannot be parsed."
134140
raise NodeNotCollectedError(msg)
135141
self._entries[name] = collected_node
142+
143+
node = self._entries[name]
144+
if hasattr(node, "attributes"):
145+
node.attributes[DATA_CATALOG_NAME_FIELD] = self.name
146+
else:
147+
warn_about_upcoming_attributes_field_on_nodes()

src/_pytask/data_catalog_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Contains utilities for the data catalog."""
2+
3+
__all__ = ["DATA_CATALOG_NAME_FIELD"]
4+
5+
6+
DATA_CATALOG_NAME_FIELD = "catalog_name"

src/_pytask/node_protocols.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from typing import TYPE_CHECKING
45
from typing import Any
56
from typing import Callable
@@ -138,3 +139,14 @@ def load(self, is_product: bool = False) -> Any: # pragma: no cover
138139

139140
def collect(self) -> list[Any]:
140141
"""Collect the objects that are defined by the provisional nodes."""
142+
143+
144+
def warn_about_upcoming_attributes_field_on_nodes() -> None:
145+
warnings.warn(
146+
"PNode and PProvisionalNode will require an 'attributes' field starting "
147+
"with pytask v0.6.0. It is a dictionary with any type of key and values "
148+
"similar to PTask. See https://tinyurl.com/pytask-custom-nodes for more "
149+
"information about adjusting your custom nodes.",
150+
stacklevel=1,
151+
category=FutureWarning,
152+
)

0 commit comments

Comments
 (0)