Skip to content

Commit a66565c

Browse files
authored
Add data catalog name to nodes to make representation unique. (#650)
1 parent 541cfc6 commit a66565c

14 files changed

+140
-12
lines changed

docs/source/changes.md

+7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ chronological order. Releases follow [semantic versioning](https://semver.org/)
55
releases are available on [PyPI](https://pypi.org/project/pytask) and
66
[Anaconda.org](https://anaconda.org/conda-forge/pytask).
77

8+
## 0.5.3 - 2025-xx-xx
9+
10+
- {pull}`650` allows to identify from which data catalog a node is coming from. Thanks
11+
to {user}`felixschmitz` for the report! The feature is enabled by adding an
12+
`attributes` field on `PNode` and `PProvisionalNode` that will be mandatory on custom
13+
nodes in v0.6.0.
14+
815
## 0.5.2 - 2024-12-19
916

1017
- {pull}`633` adds support for Python 3.13 and drops support for 3.8.

docs_src/how_to_guides/writing_custom_nodes_example_3_py310.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,20 @@ class PickleNode:
1515
Name of the node which makes it identifiable in the DAG.
1616
path
1717
The path to the file.
18+
attributes
19+
Additional attributes that are stored in the node.
1820
1921
"""
2022

21-
def __init__(self, name: str = "", path: Path | None = None) -> None:
23+
def __init__(
24+
self,
25+
name: str = "",
26+
path: Path | None = None,
27+
attributes: dict[Any, Any] | None = None,
28+
) -> None:
2229
self.name = name
2330
self.path = path
31+
self.attributes = attributes or {}
2432

2533
@property
2634
def signature(self) -> str:

docs_src/how_to_guides/writing_custom_nodes_example_3_py38.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,20 @@ class PickleNode:
1616
Name of the node which makes it identifiable in the DAG.
1717
path
1818
The path to the file.
19+
attributes
20+
Additional attributes that are stored in the node.
1921
2022
"""
2123

22-
def __init__(self, name: str = "", path: Optional[Path] = None) -> None:
24+
def __init__(
25+
self,
26+
name: str = "",
27+
path: Optional[Path] = None,
28+
attributes: Optional[dict[Any, Any]] = None,
29+
) -> None:
2330
self.name = name
2431
self.path = path
32+
self.attributes = attributes or {}
2533

2634
@property
2735
def signature(self) -> str:

pyproject.toml

-6
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,6 @@ Tracker = "https://github.com/pytask-dev/pytask/issues"
8484
[project.scripts]
8585
pytask = "pytask:cli"
8686

87-
[tool.uv.sources]
88-
pytask-parallel = { workspace = true }
89-
90-
[tool.uv.workspace]
91-
members = ["packages/*"]
92-
9387
[tool.uv]
9488
dev-dependencies = [
9589
"tox-uv>=1.7.0", "pygraphviz;platform_system=='Linux'",

src/_pytask/click.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from _pytask import __version__ as version
2626
from _pytask.console import console
27+
from _pytask.console import create_panel_title
2728

2829
if TYPE_CHECKING:
2930
from collections.abc import Sequence
@@ -109,7 +110,7 @@ def format_help(
109110
console.print(
110111
Panel(
111112
commands_table,
112-
title="[bold #f2f2f2]Commands[/]",
113+
title=create_panel_title("Commands"),
113114
title_align="left",
114115
border_style="grey37",
115116
)
@@ -244,7 +245,7 @@ def _print_options(group_or_command: Command | DefaultGroup, ctx: Context) -> No
244245
console.print(
245246
Panel(
246247
options_table,
247-
title="[bold #f2f2f2]Options[/]",
248+
title=create_panel_title("Options"),
248249
title_align="left",
249250
border_style="grey37",
250251
)

src/_pytask/collect.py

+4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from _pytask.node_protocols import PPathNode
3434
from _pytask.node_protocols import PProvisionalNode
3535
from _pytask.node_protocols import PTask
36+
from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes
3637
from _pytask.nodes import DirectoryNode
3738
from _pytask.nodes import PathNode
3839
from _pytask.nodes import PythonNode
@@ -385,6 +386,9 @@ def pytask_collect_node( # noqa: C901, PLR0912
385386
"""
386387
node = node_info.value
387388

389+
if isinstance(node, (PNode, PProvisionalNode)) and not hasattr(node, "attributes"):
390+
warn_about_upcoming_attributes_field_on_nodes()
391+
388392
if isinstance(node, DirectoryNode):
389393
if node.root_dir is None:
390394
node.root_dir = path

src/_pytask/console.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from rich.theme import Theme
2525
from rich.tree import Tree
2626

27+
from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD
2728
from _pytask.node_protocols import PNode
2829
from _pytask.node_protocols import PPathNode
2930
from _pytask.node_protocols import PProvisionalNode
@@ -42,6 +43,7 @@
4243

4344
__all__ = [
4445
"console",
46+
"create_panel_title",
4547
"create_summary_panel",
4648
"create_url_style_for_path",
4749
"create_url_style_for_task",
@@ -146,6 +148,11 @@ def format_node_name(
146148
"""Format the name of a node."""
147149
if isinstance(node, PPathNode):
148150
if node.name != node.path.as_posix():
151+
# For example, any node added to a data catalog has its name set to the key.
152+
if data_catalog_name := getattr(node, "attributes", {}).get(
153+
DATA_CATALOG_NAME_FIELD
154+
):
155+
return Text(f"{data_catalog_name}::{node.name}")
149156
return Text(node.name)
150157
name = shorten_path(node.path, paths)
151158
return Text(name)
@@ -156,6 +163,11 @@ def format_node_name(
156163
reduced_name = shorten_path(Path(path), paths)
157164
return Text(f"{reduced_name}::{rest}")
158165

166+
# Python or other custom nodes that are not PathNodes.
167+
if data_catalog_name := getattr(node, "attributes", {}).get(
168+
DATA_CATALOG_NAME_FIELD
169+
):
170+
return Text(f"{data_catalog_name}::{node.name}")
159171
return Text(node.name)
160172

161173

@@ -293,10 +305,15 @@ def create_summary_panel(
293305

294306
return Panel(
295307
grid,
296-
title="[bold #f2f2f2]Summary[/]",
308+
title=create_panel_title("Summary"),
297309
expand=False,
298310
style="none",
299311
border_style=outcome_enum.FAIL.style
300312
if counts[outcome_enum.FAIL]
301313
else outcome_enum.SUCCESS.style,
302314
)
315+
316+
317+
def create_panel_title(title: str) -> Text:
318+
"""Create a title for a panel."""
319+
return Text(title, style="bold #f2f2f2")

src/_pytask/data_catalog.py

+12
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
from attrs import field
1818

1919
from _pytask.config_utils import find_project_root_and_config
20+
from _pytask.data_catalog_utils import DATA_CATALOG_NAME_FIELD
2021
from _pytask.exceptions import NodeNotCollectedError
2122
from _pytask.models import NodeInfo
2223
from _pytask.node_protocols import PNode
2324
from _pytask.node_protocols import PPathNode
2425
from _pytask.node_protocols import PProvisionalNode
26+
from _pytask.node_protocols import warn_about_upcoming_attributes_field_on_nodes
2527
from _pytask.nodes import PickleNode
2628
from _pytask.pluginmanager import storage
2729
from _pytask.session import Session
@@ -92,6 +94,10 @@ def __attrs_post_init__(self) -> None:
9294
# Initialize the data catalog with persisted nodes from previous runs.
9395
for path in self.path.glob("*-node.pkl"):
9496
node = pickle.loads(path.read_bytes()) # noqa: S301
97+
if not hasattr(node, "attributes"):
98+
warn_about_upcoming_attributes_field_on_nodes()
99+
else:
100+
node.attributes = {DATA_CATALOG_NAME_FIELD: self.name}
95101
self._entries[node.name] = node
96102

97103
def __getitem__(self, name: str) -> PNode | PProvisionalNode:
@@ -133,3 +139,9 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
133139
msg = f"{node!r} cannot be parsed."
134140
raise NodeNotCollectedError(msg)
135141
self._entries[name] = collected_node
142+
143+
node = self._entries[name]
144+
if hasattr(node, "attributes"):
145+
node.attributes[DATA_CATALOG_NAME_FIELD] = self.name
146+
else:
147+
warn_about_upcoming_attributes_field_on_nodes()

src/_pytask/data_catalog_utils.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Contains utilities for the data catalog."""
2+
3+
__all__ = ["DATA_CATALOG_NAME_FIELD"]
4+
5+
6+
DATA_CATALOG_NAME_FIELD = "catalog_name"

src/_pytask/node_protocols.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from typing import TYPE_CHECKING
45
from typing import Any
56
from typing import Callable
@@ -138,3 +139,14 @@ def load(self, is_product: bool = False) -> Any: # pragma: no cover
138139

139140
def collect(self) -> list[Any]:
140141
"""Collect the objects that are defined by the provisional nodes."""
142+
143+
144+
def warn_about_upcoming_attributes_field_on_nodes() -> None:
145+
warnings.warn(
146+
"PNode and PProvisionalNode will require an 'attributes' field starting "
147+
"with pytask v0.6.0. It is a dictionary with any type of key and values "
148+
"similar to PTask. See https://tinyurl.com/pytask-custom-nodes for more "
149+
"information about adjusting your custom nodes.",
150+
stacklevel=1,
151+
category=FutureWarning,
152+
)

src/_pytask/nodes.py

+12
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,14 @@ class PathNode(PPathNode):
162162
Name of the node which makes it identifiable in the DAG.
163163
path
164164
The path to the file.
165+
attributes: dict[Any, Any]
166+
A dictionary to store additional information of the task.
165167
166168
"""
167169

168170
path: Path
169171
name: str = ""
172+
attributes: dict[Any, Any] = field(factory=dict)
170173

171174
@property
172175
def signature(self) -> str:
@@ -219,6 +222,8 @@ class PythonNode(PNode):
219222
objects. The function should return either an integer or a string.
220223
node_info
221224
The infos acquired while collecting the node.
225+
attributes: dict[Any, Any]
226+
A dictionary to store additional information of the task.
222227
223228
Examples
224229
--------
@@ -237,6 +242,7 @@ class PythonNode(PNode):
237242
value: Any | NoDefault = no_default
238243
hash: bool | Callable[[Any], int | str] = False
239244
node_info: NodeInfo | None = None
245+
attributes: dict[Any, Any] = field(factory=dict)
240246

241247
@property
242248
def signature(self) -> str:
@@ -302,11 +308,14 @@ class PickleNode(PPathNode):
302308
Name of the node which makes it identifiable in the DAG.
303309
path
304310
The path to the file.
311+
attributes: dict[Any, Any]
312+
A dictionary to store additional information of the task.
305313
306314
"""
307315

308316
path: Path
309317
name: str = ""
318+
attributes: dict[Any, Any] = field(factory=dict)
310319

311320
@property
312321
def signature(self) -> str:
@@ -350,12 +359,15 @@ class DirectoryNode(PProvisionalNode):
350359
root_dir
351360
The pattern is interpreted relative to the path given by ``root_dir``. If
352361
``root_dir = None``, it is the directory where the path is defined.
362+
attributes: dict[Any, Any]
363+
A dictionary to store additional information of the task.
353364
354365
"""
355366

356367
name: str = ""
357368
pattern: str = "*"
358369
root_dir: Path | None = None
370+
attributes: dict[Any, Any] = field(factory=dict)
359371

360372
@property
361373
def signature(self) -> str:

src/_pytask/warnings.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from rich.panel import Panel
1313

1414
from _pytask.console import console
15+
from _pytask.console import create_panel_title
1516
from _pytask.pluginmanager import hookimpl
1617
from _pytask.warnings_utils import WarningReport
1718
from _pytask.warnings_utils import catch_warnings_for_item
@@ -82,7 +83,9 @@ def pytask_log_session_footer(session: Session) -> None:
8283
"""Log warnings at the end of a session."""
8384
if session.warnings:
8485
renderable = _WarningsRenderable(session.warnings)
85-
panel = Panel(renderable, title="Warnings", style="warning")
86+
panel = Panel(
87+
renderable, title=create_panel_title("Warnings"), style="warning"
88+
)
8689
console.print(panel)
8790

8891

tests/test_collect_command.py

+2
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ def task_example(
517517
def test_node_protocol_for_custom_nodes_with_paths(runner, tmp_path):
518518
source = """
519519
from typing import Annotated
520+
from typing import Any
520521
from pytask import Product
521522
from pathlib import Path
522523
from attrs import define
@@ -527,6 +528,7 @@ class PickleFile:
527528
name: str
528529
path: Path
529530
signature: str = "id"
531+
attributes: dict[Any, Any] = {}
530532
531533
def state(self):
532534
return str(self.path.stat().st_mtime)

0 commit comments

Comments
 (0)