Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 696e931

Browse files
committedMar 15, 2024
Merge branch 'main' into delayed-nodes
2 parents c3b5596 + a5ac789 commit 696e931

13 files changed

+43
-57
lines changed
 

‎.github/workflows/main.yml

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ concurrency:
55
group: ${{ github.head_ref || github.run_id }}
66
cancel-in-progress: true
77

8-
env:
9-
CONDA_EXE: mamba
10-
118
on:
129
push:
1310
branches:

‎.pre-commit-config.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,18 @@ repos:
3333
hooks:
3434
- id: sort-all
3535
- repo: https://github.com/astral-sh/ruff-pre-commit
36-
rev: v0.3.1
36+
rev: v0.3.2
3737
hooks:
3838
- id: ruff-format
3939
- id: ruff
4040
args: [--unsafe-fixes]
4141
- repo: https://github.com/dosisod/refurb
42-
rev: v1.28.0
42+
rev: v2.0.0
4343
hooks:
4444
- id: refurb
4545
args: [--ignore, FURB126]
4646
- repo: https://github.com/pre-commit/mirrors-mypy
47-
rev: v1.8.0
47+
rev: v1.9.0
4848
hooks:
4949
- id: mypy
5050
additional_dependencies: [
@@ -80,8 +80,6 @@ repos:
8080
- repo: https://github.com/nbQA-dev/nbQA
8181
rev: 1.8.4
8282
hooks:
83-
- id: nbqa-black
84-
- id: nbqa-isort
8583
- id: nbqa-mypy
8684
args: [--ignore-missing-imports]
8785
- repo: https://github.com/kynan/nbstripout

‎docs/source/changes.md

+8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
2020
- {pull}`566` makes universal-pathlib an official dependency.
2121
- {pull}`568` restricts `task_files` to a list of patterns and raises a better error.
2222
- {pull}`569` removes the hooks related to the creation of the DAG.
23+
- {pull}`571` removes redundant calls to `PNode.state()` which causes a high penalty for
24+
remote files.
25+
- {pull}`573` removes the `pytask_execute_create_scheduler` hook.
26+
27+
## 0.4.6 - 2024-03-13
28+
29+
- {pull}`576` fixes accidentally collecting `pytask.MarkGenerator` when using
30+
`from pytask import mark`.
2331

2432
## 0.4.5 - 2024-01-09
2533

‎docs/source/reference_guides/hookspecs.md

-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ The following hooks execute the tasks and log information on the result in the t
6969
```{eval-rst}
7070
.. autofunction:: pytask_execute
7171
.. autofunction:: pytask_execute_log_start
72-
.. autofunction:: pytask_execute_create_scheduler
7372
.. autofunction:: pytask_execute_build
7473
.. autofunction:: pytask_execute_task_protocol
7574
.. autofunction:: pytask_execute_task_log_start

‎src/_pytask/_hashlib.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -232,5 +232,5 @@ def hash_value(value: Any) -> int | str:
232232
if isinstance(value, str):
233233
value = value.encode()
234234
if isinstance(value, bytes):
235-
return str(hashlib.sha256(value).hexdigest())
235+
return hashlib.sha256(value).hexdigest()
236236
return hash(value)

‎src/_pytask/data_catalog.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def add(self, name: str, node: PNode | PProvisionalNode | None = None) -> None:
100100
raise TypeError(msg)
101101

102102
if node is None:
103-
filename = str(hashlib.sha256(name.encode()).hexdigest())
103+
filename = hashlib.sha256(name.encode()).hexdigest()
104104
if isinstance(self.default_node, PPathNode):
105105
self.entries[name] = self.default_node(
106106
name=name, path=self.path / f"{filename}.pkl"

‎src/_pytask/database_utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,10 @@ def update_states_in_database(session: Session, task_signature: str) -> None:
6969
_create_or_update_state(task_signature, node.signature, hash_)
7070

7171

72-
def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
72+
def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> bool:
7373
"""Indicate whether a single dependency or product has changed."""
7474
# If node does not exist, we receive None.
75-
node_state = node.state()
76-
if node_state is None:
75+
if state is None:
7776
return True
7877

7978
with DatabaseSession() as session:
@@ -83,4 +82,4 @@ def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
8382
if db_state is None:
8483
return True
8584

86-
return node_state != db_state.hash_
85+
return state != db_state.hash_

‎src/_pytask/execute.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def pytask_post_parse(config: dict[str, Any]) -> None:
6060
def pytask_execute(session: Session) -> None:
6161
"""Execute tasks."""
6262
session.hook.pytask_execute_log_start(session=session)
63-
session.scheduler = session.hook.pytask_execute_create_scheduler(session=session)
63+
session.scheduler = TopologicalSorter.from_dag(session.dag)
6464
session.hook.pytask_execute_build(session=session)
6565
session.hook.pytask_execute_log_end(
6666
session=session, reports=session.execution_reports
@@ -76,12 +76,6 @@ def pytask_execute_log_start(session: Session) -> None:
7676
console.print()
7777

7878

79-
@hookimpl(trylast=True)
80-
def pytask_execute_create_scheduler(session: Session) -> TopologicalSorter:
81-
"""Create a scheduler based on topological sorting."""
82-
return TopologicalSorter.from_dag(session.dag)
83-
84-
8579
@hookimpl
8680
def pytask_execute_build(session: Session) -> bool | None:
8781
"""Execute tasks."""
@@ -146,8 +140,8 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
146140
node = dag.nodes[node_signature].get("task") or dag.nodes[
147141
node_signature
148142
].get("node")
149-
150-
if node_signature in predecessors and not node.state():
143+
node_state = node.state()
144+
if node_signature in predecessors and not node_state:
151145
msg = f"{task.name!r} requires missing node {node.name!r}."
152146
if IS_FILE_SYSTEM_CASE_SENSITIVE:
153147
msg += (
@@ -162,7 +156,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
162156
):
163157
continue
164158

165-
has_changed = has_node_changed(task=task, node=node)
159+
has_changed = has_node_changed(task=task, node=node, state=node_state)
166160
if has_changed:
167161
needs_to_be_executed = True
168162
break

‎src/_pytask/hookspecs.py

-10
Original file line numberDiff line numberDiff line change
@@ -233,16 +233,6 @@ def pytask_execute_log_start(session: Session) -> None:
233233
"""
234234

235235

236-
@hookspec(firstresult=True)
237-
def pytask_execute_create_scheduler(session: Session) -> Any:
238-
"""Create a scheduler for the execution.
239-
240-
The scheduler provides information on which tasks are able to be executed. Its
241-
foundation is likely a topological ordering of the tasks based on the DAG.
242-
243-
"""
244-
245-
246236
@hookspec(firstresult=True)
247237
def pytask_execute_build(session: Session) -> Any:
248238
"""Execute the build.

‎src/_pytask/nodes.py

+13-17
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import hashlib
66
import inspect
77
import pickle
8+
from contextlib import suppress
89
from os import stat_result
910
from pathlib import Path # noqa: TCH003
1011
from typing import TYPE_CHECKING
@@ -83,12 +84,10 @@ def signature(self) -> str:
8384

8485
def state(self) -> str | None:
8586
"""Return the state of the node."""
86-
try:
87+
with suppress(OSError):
8788
source = inspect.getsource(self.function)
88-
except OSError:
89-
return None
90-
else:
9189
return hashlib.sha256(source.encode()).hexdigest()
90+
return None
9291

9392
def execute(self, **kwargs: Any) -> Any:
9493
"""Execute the task."""
@@ -145,10 +144,7 @@ def signature(self) -> str:
145144

146145
def state(self) -> str | None:
147146
"""Return the state of the node."""
148-
if self.path.exists():
149-
modification_time = self.path.stat().st_mtime
150-
return hash_path(self.path, modification_time)
151-
return None
147+
return _get_state(self.path)
152148

153149
def execute(self, **kwargs: Any) -> Any:
154150
"""Execute the task."""
@@ -188,9 +184,7 @@ def state(self) -> str | None:
188184
The state is given by the modification timestamp.
189185
190186
"""
191-
if self.path.exists():
192-
return _get_state(self.path)
193-
return None
187+
return _get_state(self.path)
194188

195189
def load(self, is_product: bool = False) -> Path: # noqa: ARG002
196190
"""Load the value."""
@@ -324,9 +318,7 @@ def from_path(cls, path: Path) -> PickleNode:
324318
return cls(name=path.as_posix(), path=path)
325319

326320
def state(self) -> str | None:
327-
if self.path.exists():
328-
return _get_state(self.path)
329-
return None
321+
return _get_state(self.path)
330322

331323
def load(self, is_product: bool = False) -> Any:
332324
if is_product:
@@ -377,15 +369,19 @@ def collect(self) -> list[Path]:
377369
return list(self.root_dir.glob(self.pattern)) # type: ignore[union-attr]
378370

379371

380-
def _get_state(path: Path) -> str:
372+
def _get_state(path: Path) -> str | None:
381373
"""Get state of a path.
382374
383375
A simple function to handle local and remote files.
384376
385377
"""
386-
stat = path.stat()
378+
try:
379+
stat = path.stat()
380+
except FileNotFoundError:
381+
return None
382+
387383
if isinstance(stat, stat_result):
388-
modification_time = path.stat().st_mtime
384+
modification_time = stat.st_mtime
389385
return hash_path(path, modification_time)
390386
if isinstance(stat, UPathStatResult):
391387
return stat.as_info().get("ETag", "0")

‎src/_pytask/persist.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,25 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
4040
4141
"""
4242
if has_mark(task, "persist"):
43-
all_nodes_exist = all(
43+
all_states = [
4444
(
4545
session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"]
4646
).state()
4747
for name in node_and_neighbors(session.dag, task.signature)
48-
)
48+
]
49+
all_nodes_exist = all(all_states)
4950

5051
if all_nodes_exist:
5152
any_node_changed = any(
5253
has_node_changed(
5354
task=task,
5455
node=session.dag.nodes[name].get("task")
5556
or session.dag.nodes[name]["node"],
57+
state=state,
58+
)
59+
for name, state in zip(
60+
node_and_neighbors(session.dag, task.signature), all_states
5661
)
57-
for name in node_and_neighbors(session.dag, task.signature)
5862
)
5963
if any_node_changed:
6064
collect_provisional_products(session, task)

‎tests/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class SysPathsSnapshot:
5454
"""A snapshot for sys.path."""
5555

5656
def __init__(self) -> None:
57-
self.__saved = list(sys.path), list(sys.meta_path)
57+
self.__saved = sys.path.copy(), sys.meta_path.copy()
5858

5959
def restore(self) -> None:
6060
sys.path[:], sys.meta_path[:] = self.__saved
@@ -64,7 +64,7 @@ class SysModulesSnapshot:
6464
"""A snapshot for sys.modules."""
6565

6666
def __init__(self) -> None:
67-
self.__saved = dict(sys.modules)
67+
self.__saved = sys.modules.copy()
6868

6969
def restore(self) -> None:
7070
sys.modules.clear()

‎tests/test_collect.py

+1
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,7 @@ def __getattr__(self, name):
543543

544544
result = runner.invoke(cli, [tmp_path.as_posix()])
545545
assert result.exit_code == ExitCode.OK
546+
assert "attr_that_definitely_does_not_exist" not in result.output
546547

547548

548549
@pytest.mark.end_to_end()

0 commit comments

Comments
 (0)