Skip to content

Commit d218e5a

Browse files
authored
Merge branch 'main' into delayed-nodes
2 parents c3b5596 + fe0c91c commit d218e5a

File tree

10 files changed

+35
-39
lines changed

10 files changed

+35
-39
lines changed

.github/workflows/main.yml

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ concurrency:
55
group: ${{ github.head_ref || github.run_id }}
66
cancel-in-progress: true
77

8-
env:
9-
CONDA_EXE: mamba
10-
118
on:
129
push:
1310
branches:

.pre-commit-config.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,18 @@ repos:
3333
hooks:
3434
- id: sort-all
3535
- repo: https://github.com/astral-sh/ruff-pre-commit
36-
rev: v0.3.1
36+
rev: v0.3.2
3737
hooks:
3838
- id: ruff-format
3939
- id: ruff
4040
args: [--unsafe-fixes]
4141
- repo: https://github.com/dosisod/refurb
42-
rev: v1.28.0
42+
rev: v2.0.0
4343
hooks:
4444
- id: refurb
4545
args: [--ignore, FURB126]
4646
- repo: https://github.com/pre-commit/mirrors-mypy
47-
rev: v1.8.0
47+
rev: v1.9.0
4848
hooks:
4949
- id: mypy
5050
additional_dependencies: [
@@ -80,8 +80,6 @@ repos:
8080
- repo: https://github.com/nbQA-dev/nbQA
8181
rev: 1.8.4
8282
hooks:
83-
- id: nbqa-black
84-
- id: nbqa-isort
8583
- id: nbqa-mypy
8684
args: [--ignore-missing-imports]
8785
- repo: https://github.com/kynan/nbstripout

docs/source/changes.md

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
2020
- {pull}`566` makes universal-pathlib an official dependency.
2121
- {pull}`568` restricts `task_files` to a list of patterns and raises a better error.
2222
- {pull}`569` removes the hooks related to the creation of the DAG.
23+
- {pull}`571` removes redundant calls to `PNode.state()` which causes a high penalty for
24+
remote files.
2325

2426
## 0.4.5 - 2024-01-09
2527

src/_pytask/_hashlib.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -232,5 +232,5 @@ def hash_value(value: Any) -> int | str:
232232
if isinstance(value, str):
233233
value = value.encode()
234234
if isinstance(value, bytes):
235-
return str(hashlib.sha256(value).hexdigest())
235+
return hashlib.sha256(value).hexdigest()
236236
return hash(value)

src/_pytask/data_catalog.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def add(self, name: str, node: PNode | PProvisionalNode | None = None) -> None:
100100
raise TypeError(msg)
101101

102102
if node is None:
103-
filename = str(hashlib.sha256(name.encode()).hexdigest())
103+
filename = hashlib.sha256(name.encode()).hexdigest()
104104
if isinstance(self.default_node, PPathNode):
105105
self.entries[name] = self.default_node(
106106
name=name, path=self.path / f"{filename}.pkl"

src/_pytask/database_utils.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,10 @@ def update_states_in_database(session: Session, task_signature: str) -> None:
6969
_create_or_update_state(task_signature, node.signature, hash_)
7070

7171

72-
def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
72+
def has_node_changed(task: PTask, node: PTask | PNode, state: str | None) -> bool:
7373
"""Indicate whether a single dependency or product has changed."""
7474
# If node does not exist, we receive None.
75-
node_state = node.state()
76-
if node_state is None:
75+
if state is None:
7776
return True
7877

7978
with DatabaseSession() as session:
@@ -83,4 +82,4 @@ def has_node_changed(task: PTask, node: PTask | PNode) -> bool:
8382
if db_state is None:
8483
return True
8584

86-
return node_state != db_state.hash_
85+
return state != db_state.hash_

src/_pytask/execute.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
146146
node = dag.nodes[node_signature].get("task") or dag.nodes[
147147
node_signature
148148
].get("node")
149-
150-
if node_signature in predecessors and not node.state():
149+
node_state = node.state()
150+
if node_signature in predecessors and not node_state:
151151
msg = f"{task.name!r} requires missing node {node.name!r}."
152152
if IS_FILE_SYSTEM_CASE_SENSITIVE:
153153
msg += (
@@ -162,7 +162,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: # noqa: C
162162
):
163163
continue
164164

165-
has_changed = has_node_changed(task=task, node=node)
165+
has_changed = has_node_changed(task=task, node=node, state=node_state)
166166
if has_changed:
167167
needs_to_be_executed = True
168168
break

src/_pytask/nodes.py

+13-17
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import hashlib
66
import inspect
77
import pickle
8+
from contextlib import suppress
89
from os import stat_result
910
from pathlib import Path # noqa: TCH003
1011
from typing import TYPE_CHECKING
@@ -83,12 +84,10 @@ def signature(self) -> str:
8384

8485
def state(self) -> str | None:
8586
"""Return the state of the node."""
86-
try:
87+
with suppress(OSError):
8788
source = inspect.getsource(self.function)
88-
except OSError:
89-
return None
90-
else:
9189
return hashlib.sha256(source.encode()).hexdigest()
90+
return None
9291

9392
def execute(self, **kwargs: Any) -> Any:
9493
"""Execute the task."""
@@ -145,10 +144,7 @@ def signature(self) -> str:
145144

146145
def state(self) -> str | None:
147146
"""Return the state of the node."""
148-
if self.path.exists():
149-
modification_time = self.path.stat().st_mtime
150-
return hash_path(self.path, modification_time)
151-
return None
147+
return _get_state(self.path)
152148

153149
def execute(self, **kwargs: Any) -> Any:
154150
"""Execute the task."""
@@ -188,9 +184,7 @@ def state(self) -> str | None:
188184
The state is given by the modification timestamp.
189185
190186
"""
191-
if self.path.exists():
192-
return _get_state(self.path)
193-
return None
187+
return _get_state(self.path)
194188

195189
def load(self, is_product: bool = False) -> Path: # noqa: ARG002
196190
"""Load the value."""
@@ -324,9 +318,7 @@ def from_path(cls, path: Path) -> PickleNode:
324318
return cls(name=path.as_posix(), path=path)
325319

326320
def state(self) -> str | None:
327-
if self.path.exists():
328-
return _get_state(self.path)
329-
return None
321+
return _get_state(self.path)
330322

331323
def load(self, is_product: bool = False) -> Any:
332324
if is_product:
@@ -377,15 +369,19 @@ def collect(self) -> list[Path]:
377369
return list(self.root_dir.glob(self.pattern)) # type: ignore[union-attr]
378370

379371

380-
def _get_state(path: Path) -> str:
372+
def _get_state(path: Path) -> str | None:
381373
"""Get state of a path.
382374
383375
A simple function to handle local and remote files.
384376
385377
"""
386-
stat = path.stat()
378+
try:
379+
stat = path.stat()
380+
except FileNotFoundError:
381+
return None
382+
387383
if isinstance(stat, stat_result):
388-
modification_time = path.stat().st_mtime
384+
modification_time = stat.st_mtime
389385
return hash_path(path, modification_time)
390386
if isinstance(stat, UPathStatResult):
391387
return stat.as_info().get("ETag", "0")

src/_pytask/persist.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,25 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
4040
4141
"""
4242
if has_mark(task, "persist"):
43-
all_nodes_exist = all(
43+
all_states = [
4444
(
4545
session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"]
4646
).state()
4747
for name in node_and_neighbors(session.dag, task.signature)
48-
)
48+
]
49+
all_nodes_exist = all(all_states)
4950

5051
if all_nodes_exist:
5152
any_node_changed = any(
5253
has_node_changed(
5354
task=task,
5455
node=session.dag.nodes[name].get("task")
5556
or session.dag.nodes[name]["node"],
57+
state=state,
58+
)
59+
for name, state in zip(
60+
node_and_neighbors(session.dag, task.signature), all_states
5661
)
57-
for name in node_and_neighbors(session.dag, task.signature)
5862
)
5963
if any_node_changed:
6064
collect_provisional_products(session, task)

tests/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class SysPathsSnapshot:
5454
"""A snapshot for sys.path."""
5555

5656
def __init__(self) -> None:
57-
self.__saved = list(sys.path), list(sys.meta_path)
57+
self.__saved = sys.path.copy(), sys.meta_path.copy()
5858

5959
def restore(self) -> None:
6060
sys.path[:], sys.meta_path[:] = self.__saved
@@ -64,7 +64,7 @@ class SysModulesSnapshot:
6464
"""A snapshot for sys.modules."""
6565

6666
def __init__(self) -> None:
67-
self.__saved = dict(sys.modules)
67+
self.__saved = sys.modules.copy()
6868

6969
def restore(self) -> None:
7070
sys.modules.clear()

0 commit comments

Comments
 (0)