Skip to content

Commit 9479410

Browse files
authored
Merge branch 'main' into uv
2 parents bf915fa + d0f6542 commit 9479410

24 files changed

+102
-161
lines changed

.github/workflows/main.yml

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ concurrency:
55
group: ${{ github.head_ref || github.run_id }}
66
cancel-in-progress: true
77

8-
env:
9-
CONDA_EXE: mamba
10-
118
on:
129
push:
1310
branches:

.github/workflows/update-plugin-list.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
run: python scripts/update_plugin_list.py
3838

3939
- name: Create Pull Request
40-
uses: peter-evans/create-pull-request@b1ddad2c994a25fbc81a28b3ec0e368bb2021c50
40+
uses: peter-evans/create-pull-request@a4f52f8033a6168103c2538976c07b467e8163bc
4141
with:
4242
commit-message: '[automated] Update plugin list'
4343
author: 'Tobias Raabe <[email protected]>'

.pre-commit-config.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,18 @@ repos:
3333
hooks:
3434
- id: sort-all
3535
- repo: https://github.com/astral-sh/ruff-pre-commit
36-
rev: v0.3.1
36+
rev: v0.3.2
3737
hooks:
3838
- id: ruff-format
3939
- id: ruff
4040
args: [--unsafe-fixes]
4141
- repo: https://github.com/dosisod/refurb
42-
rev: v1.28.0
42+
rev: v2.0.0
4343
hooks:
4444
- id: refurb
4545
args: [--ignore, FURB126]
4646
- repo: https://github.com/pre-commit/mirrors-mypy
47-
rev: v1.8.0
47+
rev: v1.9.0
4848
hooks:
4949
- id: mypy
5050
additional_dependencies: [
@@ -80,8 +80,6 @@ repos:
8080
- repo: https://github.com/nbQA-dev/nbQA
8181
rev: 1.8.4
8282
hooks:
83-
- id: nbqa-black
84-
- id: nbqa-isort
8583
- id: nbqa-mypy
8684
args: [--ignore-missing-imports]
8785
- repo: https://github.com/kynan/nbstripout

docs/source/changes.md

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
1919
- {pull}`557` fixes an issue with `@task(after=...)` in notebooks and terminals.
2020
- {pull}`566` makes universal-pathlib an official dependency.
2121
- {pull}`567` adds uv to the CI workflow for faster installation.
22+
- {pull}`568` restricts `task_files` to a list of patterns and raises a better error.
23+
- {pull}`569` removes the hooks related to the creation of the DAG.
24+
- {pull}`571` removes redundant calls to `PNode.state()` which causes a high penalty for
25+
remote files.
26+
- {pull}`573` removes the `pytask_execute_create_scheduler` hook.
2227

2328
## 0.4.5 - 2024-01-09
2429

docs/source/reference_guides/configuration.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ strict_markers = true
242242
Change the pattern which identify task files.
243243
244244
```toml
245-
task_files = "task_*.py" # default
245+
task_files = ["task_*.py"] # default
246246
247247
task_files = ["task_*.py", "tasks_*.py"]
248248
```

docs/source/reference_guides/hookspecs.md

-20
Original file line numberDiff line numberDiff line change
@@ -62,33 +62,13 @@ The following hooks traverse directories and collect tasks from files.
6262
.. autofunction:: pytask_collect_log
6363
```
6464

65-
## Resolving Dependencies
66-
67-
The following hooks are designed to build a DAG from tasks and dependencies and check
68-
which files have changed and need to be re-run.
69-
70-
```{warning}
71-
This step is still experimental and likely to change in the future. If you are planning
72-
to write a plugin which extends pytask in this dimension, please, start a discussion
73-
before writing a plugin. It may make your life easier if changes in pytask anticipate
74-
your plugin.
75-
```
76-
77-
```{eval-rst}
78-
.. autofunction:: pytask_dag
79-
.. autofunction:: pytask_dag_create_dag
80-
.. autofunction:: pytask_dag_log
81-
82-
```
83-
8465
## Execution
8566

8667
The following hooks execute the tasks and log information on the result in the terminal.
8768

8869
```{eval-rst}
8970
.. autofunction:: pytask_execute
9071
.. autofunction:: pytask_execute_log_start
91-
.. autofunction:: pytask_execute_create_scheduler
9272
.. autofunction:: pytask_execute_build
9373
.. autofunction:: pytask_execute_task_protocol
9474
.. autofunction:: pytask_execute_task_log_start

docs/source/tutorials/defining_dependencies_products.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ task has finished, pytask will check whether the file exists.
7272
7373
````
7474
75-
````{tab-item} prodouces
75+
````{tab-item} produces
7676
:sync: produces
7777
7878
```{literalinclude} ../../../docs_src/tutorials/defining_dependencies_products_products_produces.py
@@ -132,7 +132,7 @@ annotation are dependencies of the task.
132132
133133
````
134134
135-
````{tab-item} prodouces
135+
````{tab-item} produces
136136
:sync: produces
137137
138138
To specify that the task relies on the data set `data.pkl`, you can add the path to the
@@ -178,7 +178,7 @@ are assumed to point to a location relative to the task module.
178178
179179
````
180180
181-
````{tab-item} prodouces
181+
````{tab-item} produces
182182
:sync: produces
183183
184184
```{literalinclude} ../../../docs_src/tutorials/defining_dependencies_products_relative_produces.py

src/_pytask/_hashlib.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -232,5 +232,5 @@ def hash_value(value: Any) -> int | str:
232232
if isinstance(value, str):
233233
value = value.encode()
234234
if isinstance(value, bytes):
235-
return str(hashlib.sha256(value).hexdigest())
235+
return hashlib.sha256(value).hexdigest()
236236
return hash(value)

src/_pytask/build.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from _pytask.config_utils import find_project_root_and_config
2222
from _pytask.config_utils import read_config
2323
from _pytask.console import console
24+
from _pytask.dag import create_dag
2425
from _pytask.exceptions import CollectionError
2526
from _pytask.exceptions import ConfigurationError
2627
from _pytask.exceptions import ExecutionError
@@ -95,7 +96,7 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915
9596
stop_after_first_failure: bool = False,
9697
strict_markers: bool = False,
9798
tasks: Callable[..., Any] | PTask | Iterable[Callable[..., Any] | PTask] = (),
98-
task_files: str | Iterable[str] = "task_*.py",
99+
task_files: Iterable[str] = ("task_*.py",),
99100
trace: bool = False,
100101
verbose: int = 1,
101102
**kwargs: Any,
@@ -265,7 +266,7 @@ def build( # noqa: C901, PLR0912, PLR0913, PLR0915
265266
try:
266267
session.hook.pytask_log_session_header(session=session)
267268
session.hook.pytask_collect(session=session)
268-
session.hook.pytask_dag(session=session)
269+
session.dag = create_dag(session=session)
269270
session.hook.pytask_execute(session=session)
270271

271272
except CollectionError:

src/_pytask/collect_command.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from _pytask.console import create_url_style_for_path
2020
from _pytask.console import format_node_name
2121
from _pytask.console import format_task_name
22+
from _pytask.dag import create_dag
2223
from _pytask.exceptions import CollectionError
2324
from _pytask.exceptions import ConfigurationError
2425
from _pytask.exceptions import ResolvingDependenciesError
@@ -70,7 +71,7 @@ def collect(**raw_config: Any | None) -> NoReturn:
7071
try:
7172
session.hook.pytask_log_session_header(session=session)
7273
session.hook.pytask_collect(session=session)
73-
session.hook.pytask_dag(session=session)
74+
session.dag = create_dag(session=session)
7475

7576
tasks = _select_tasks_by_expressions_and_marker(session)
7677
task_with_path = [t for t in tasks if isinstance(t, PTaskWithPath)]

src/_pytask/config.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,13 @@ def pytask_parse_config(config: dict[str, Any]) -> None:
9292
+ IGNORED_TEMPORARY_FILES_AND_FOLDERS
9393
)
9494

95-
config["task_files"] = to_list(config.get("task_files", "task_*.py"))
95+
value = config.get("task_files", ["task_*.py"])
96+
if not isinstance(value, (list, tuple)) or not all(
97+
isinstance(p, str) for p in value
98+
):
99+
msg = "'task_files' must be a list of patterns."
100+
raise ValueError(msg)
101+
config["task_files"] = value
96102

97103
if config["stop_after_first_failure"]:
98104
config["max_failures"] = 1

src/_pytask/dag.py

+15-21
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
from _pytask.console import render_to_string
2020
from _pytask.exceptions import ResolvingDependenciesError
2121
from _pytask.mark import select_by_after_keyword
22+
from _pytask.mark import select_tasks_by_marks_and_expressions
2223
from _pytask.node_protocols import PNode
2324
from _pytask.node_protocols import PTask
2425
from _pytask.nodes import PythonNode
25-
from _pytask.pluginmanager import hookimpl
2626
from _pytask.reports import DagReport
2727
from _pytask.shared import reduce_names_of_multiple_nodes
2828
from _pytask.tree_util import tree_map
@@ -33,28 +33,28 @@
3333
from _pytask.session import Session
3434

3535

36-
@hookimpl
37-
def pytask_dag(session: Session) -> bool | None:
36+
__all__ = ["create_dag"]
37+
38+
39+
def create_dag(session: Session) -> nx.DiGraph:
3840
"""Create a directed acyclic graph (DAG) for the workflow."""
3941
try:
40-
session.dag = session.hook.pytask_dag_create_dag(
41-
session=session, tasks=session.tasks
42-
)
43-
session.hook.pytask_dag_modify_dag(session=session, dag=session.dag)
42+
dag = _create_dag(tasks=session.tasks)
43+
_check_if_dag_has_cycles(dag)
44+
_check_if_tasks_have_the_same_products(dag, session.config["paths"])
45+
_modify_dag(session=session, dag=dag)
46+
select_tasks_by_marks_and_expressions(session=session, dag=dag)
4447

4548
except Exception: # noqa: BLE001
4649
report = DagReport.from_exception(sys.exc_info())
47-
session.hook.pytask_dag_log(session=session, report=report)
50+
_log_dag(report=report)
4851
session.dag_report = report
4952

5053
raise ResolvingDependenciesError from None
51-
52-
else:
53-
return True
54+
return dag
5455

5556

56-
@hookimpl
57-
def pytask_dag_create_dag(session: Session, tasks: list[PTask]) -> nx.DiGraph:
57+
def _create_dag(tasks: list[PTask]) -> nx.DiGraph:
5858
"""Create the DAG from tasks, dependencies and products."""
5959

6060
def _add_dependency(dag: nx.DiGraph, task: PTask, node: PNode) -> None:
@@ -90,15 +90,10 @@ def _add_product(dag: nx.DiGraph, task: PTask, node: PNode) -> None:
9090
else None,
9191
task.depends_on,
9292
)
93-
94-
_check_if_dag_has_cycles(dag)
95-
_check_if_tasks_have_the_same_products(dag, session.config["paths"])
96-
9793
return dag
9894

9995

100-
@hookimpl
101-
def pytask_dag_modify_dag(session: Session, dag: nx.DiGraph) -> None:
96+
def _modify_dag(session: Session, dag: nx.DiGraph) -> None:
10297
"""Create dependencies between tasks when using ``@task(after=...)``."""
10398
temporary_id_to_task = {
10499
task.attributes["collection_id"]: task
@@ -194,8 +189,7 @@ def _check_if_tasks_have_the_same_products(dag: nx.DiGraph, paths: list[Path]) -
194189
raise ResolvingDependenciesError(msg)
195190

196191

197-
@hookimpl
198-
def pytask_dag_log(report: DagReport) -> None:
192+
def _log_dag(report: DagReport) -> None:
199193
"""Log errors which happened while resolving dependencies."""
200194
console.print()
201195
console.rule(

src/_pytask/dag_command.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from _pytask.config_utils import find_project_root_and_config
2020
from _pytask.config_utils import read_config
2121
from _pytask.console import console
22+
from _pytask.dag import create_dag
2223
from _pytask.exceptions import CollectionError
2324
from _pytask.exceptions import ConfigurationError
2425
from _pytask.exceptions import ResolvingDependenciesError
@@ -101,7 +102,7 @@ def dag(**raw_config: Any) -> int:
101102
"can install with conda.",
102103
)
103104
session.hook.pytask_collect(session=session)
104-
session.hook.pytask_dag(session=session)
105+
session.dag = create_dag(session=session)
105106
dag = _refine_dag(session)
106107
_write_graph(dag, session.config["output_path"], session.config["layout"])
107108

@@ -198,7 +199,7 @@ def build_dag(raw_config: dict[str, Any]) -> nx.DiGraph:
198199
"can install with conda.",
199200
)
200201
session.hook.pytask_collect(session=session)
201-
session.hook.pytask_dag(session=session)
202+
session.dag = create_dag(session=session)
202203
session.hook.pytask_unconfigure(session=session)
203204
dag = _refine_dag(session)
204205

src/_pytask/data_catalog.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def add(self, name: str, node: PNode | None = None) -> None:
9999
raise TypeError(msg)
100100

101101
if node is None:
102-
filename = str(hashlib.sha256(name.encode()).hexdigest())
102+
filename = hashlib.sha256(name.encode()).hexdigest()
103103
if isinstance(self.default_node, PPathNode):
104104
self.entries[name] = self.default_node(
105105
name=name, path=self.path / f"{filename}.pkl"

src/_pytask/database_utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,10 @@ def update_states_in_database(session: Session, task_signature: str) -> None:
6969
_create_or_update_state(task_signature, node.signature, hash_)
7070

7171

72-
def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
72+
def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> bool:
7373
"""Indicate whether a single dependency or product has changed."""
7474
# If node does not exist, we receive None.
75-
node_state = node.state()
76-
if node_state is None:
75+
if state is None:
7776
return True
7877

7978
with DatabaseSession() as session:
@@ -83,4 +82,4 @@ def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
8382
if db_state is None:
8483
return True
8584

86-
return node_state != db_state.hash_
85+
return state != db_state.hash_

src/_pytask/execute.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def pytask_post_parse(config: dict[str, Any]) -> None:
5757
def pytask_execute(session: Session) -> None:
5858
"""Execute tasks."""
5959
session.hook.pytask_execute_log_start(session=session)
60-
session.scheduler = session.hook.pytask_execute_create_scheduler(session=session)
60+
session.scheduler = TopologicalSorter.from_dag(session.dag)
6161
session.hook.pytask_execute_build(session=session)
6262
session.hook.pytask_execute_log_end(
6363
session=session, reports=session.execution_reports
@@ -73,12 +73,6 @@ def pytask_execute_log_start(session: Session) -> None:
7373
console.print()
7474

7575

76-
@hookimpl(trylast=True)
77-
def pytask_execute_create_scheduler(session: Session) -> TopologicalSorter:
78-
"""Create a scheduler based on topological sorting."""
79-
return TopologicalSorter.from_dag(session.dag)
80-
81-
8276
@hookimpl
8377
def pytask_execute_build(session: Session) -> bool | None:
8478
"""Execute tasks."""
@@ -140,7 +134,8 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
140134
node = dag.nodes[node_signature].get("task") or dag.nodes[
141135
node_signature
142136
].get("node")
143-
if node_signature in predecessors and not node.state():
137+
node_state = node.state()
138+
if node_signature in predecessors and not node_state:
144139
msg = f"{task.name!r} requires missing node {node.name!r}."
145140
if IS_FILE_SYSTEM_CASE_SENSITIVE:
146141
msg += (
@@ -149,7 +144,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
149144
)
150145
raise NodeNotFoundError(msg)
151146

152-
has_changed = has_node_changed(task=task, node=node)
147+
has_changed = has_node_changed(task=task, node=node, state=node_state)
153148
if has_changed:
154149
needs_to_be_executed = True
155150
break

0 commit comments

Comments
 (0)