From bee6d42a02a19bfd4a41f71cba680bc492def52a Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sun, 17 Dec 2023 22:39:32 +0100 Subject: [PATCH 1/4] Collect all tasks in COLLECTED_TASKS. --- docs/source/changes.md | 2 ++ src/_pytask/collect.py | 59 ++++++++++++++++++--------------------- src/_pytask/task.py | 32 ++------------------- src/_pytask/task_utils.py | 28 +++++++++++++++++++ tests/test_task.py | 5 ++-- 5 files changed, 62 insertions(+), 64 deletions(-) diff --git a/docs/source/changes.md b/docs/source/changes.md index 017e5970..f0623972 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -21,6 +21,8 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and - {pull}`524` improves some linting and formatting rules. - {pull}`525` enables pytask to work with remote files using universal_pathlib. - {pull}`528` improves the codecov setup and coverage. +- {pull}`529` adds a collection for all tasks cached in + {obj}`~_pytask.task_utils.COLLECTED_TASKS` regardless of location. ## 0.4.4 - 2023-12-04 diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 04ac49c4..7bafcd5b 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -21,7 +21,6 @@ from _pytask.console import create_summary_panel from _pytask.console import get_file from _pytask.exceptions import CollectionError -from _pytask.exceptions import NodeNotCollectedError from _pytask.mark_utils import get_all_marks from _pytask.mark_utils import has_mark from _pytask.node_protocols import PNode @@ -39,6 +38,8 @@ from _pytask.reports import CollectionReport from _pytask.shared import find_duplicates from _pytask.task_utils import COLLECTED_TASKS +from _pytask.task_utils import parse_collected_tasks_with_task_marker +from _pytask.task_utils import raise_error_when_task_functions_are_duplicated from _pytask.task_utils import task as task_decorator from _pytask.typing import is_task_function from rich.text import Text @@ -132,43 +133,37 @@ def _collect_from_tasks(session: Session) -> None: session.collection_reports.append(report) -_FAILED_COLLECTING_TASK = """\ -Failed to collect task '{name}'{path_desc}. +def _collect_not_collected_tasks(session: Session) -> None: + """Collect tasks that are not collected yet. -This can happen when the task function is defined in another module, imported to a \ -task module and wrapped with the '@task' decorator. + If task functions are imported from another module and then wrapped with ``@task``, + they would usually not be collected since their module is the imported module and + not the task module. This function collects these tasks and all other cached in + ``COLLECTED_TASKS``. -To collect this task correctly, wrap the imported function in a lambda expression like + """ + for path in list(COLLECTED_TASKS): + tasks = COLLECTED_TASKS.pop(path) -task(...)(lambda **x: imported_function(**x)). -""" + # Remove tasks from the global to avoid re-collection if programmatic interface + # is used. + raise_error_when_task_functions_are_duplicated(tasks) + name_to_function = parse_collected_tasks_with_task_marker(tasks) -def _collect_not_collected_tasks(session: Session) -> None: - """Collect tasks that are not collected yet and create failed reports.""" - for path in list(COLLECTED_TASKS): - tasks = COLLECTED_TASKS.pop(path) - for task in tasks: - name = task.pytask_meta.name # type: ignore[attr-defined] - node: PTask - if path: - node = Task(base_name=name, path=path, function=task) - path_desc = f" in '{path}'" - else: - node = TaskWithoutPath(name=name, function=task) - path_desc = "" - report = CollectionReport( - outcome=CollectionOutcome.FAIL, - node=node, - exc_info=( - NodeNotCollectedError, - NodeNotCollectedError( - _FAILED_COLLECTING_TASK.format(name=name, path_desc=path_desc) - ), - None, - ), + collected_reports = [] + for name, function in name_to_function.items(): + report = session.hook.pytask_collect_task_protocol( + session=session, + reports=session.collection_reports, + path=path, + name=name, + obj=function, ) - session.collection_reports.append(report) + if report is not None: + collected_reports.append(report) + + session.collection_reports.extend(collected_reports) @hookimpl diff --git a/src/_pytask/task.py b/src/_pytask/task.py index 2309695b..5b103b63 100644 --- a/src/_pytask/task.py +++ b/src/_pytask/task.py @@ -2,14 +2,12 @@ from __future__ import annotations from typing import Any -from typing import Callable from typing import TYPE_CHECKING from _pytask.config import hookimpl -from _pytask.console import format_strings_as_flat_tree -from _pytask.shared import find_duplicates from _pytask.task_utils import COLLECTED_TASKS from _pytask.task_utils import parse_collected_tasks_with_task_marker +from _pytask.task_utils import raise_error_when_task_functions_are_duplicated if TYPE_CHECKING: from _pytask.reports import CollectionReport @@ -40,7 +38,7 @@ def pytask_collect_file( # is used. tasks = COLLECTED_TASKS.pop(path) - _raise_error_when_task_functions_are_duplicated(tasks) + raise_error_when_task_functions_are_duplicated(tasks) name_to_function = parse_collected_tasks_with_task_marker(tasks) @@ -54,29 +52,3 @@ def pytask_collect_file( return collected_reports return None - - -def _raise_error_when_task_functions_are_duplicated( - tasks: list[Callable[..., Any]], -) -> None: - """Raise error when task functions are duplicated. - - When task functions are created outside the loop body, every wrapped version of the - - """ - duplicates = find_duplicates(tasks) - if not duplicates: - return - - strings = [ - f"function_name={func.pytask_meta.name}, id={func.pytask_meta.id_}" - for func in duplicates - ] - flat_tree = format_strings_as_flat_tree(strings, "Duplicated tasks") - msg = ( - "There are some duplicates among the repeated tasks. It happens when you define" - "the task function outside the loop body and merely wrap in the loop body with " - "the 'task(...)(func)' decorator. As a workaround, wrap the task function in " - f"a lambda expression like 'task(...)(lambda **x: func(**x))'.\n\n{flat_tree}" - ) - raise ValueError(msg) diff --git a/src/_pytask/task_utils.py b/src/_pytask/task_utils.py index dd4fd269..4c3124ed 100644 --- a/src/_pytask/task_utils.py +++ b/src/_pytask/task_utils.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING import attrs +from _pytask.console import format_strings_as_flat_tree from _pytask.console import get_file from _pytask.mark import Mark from _pytask.models import CollectionMetadata @@ -23,6 +24,7 @@ "COLLECTED_TASKS", "parse_collected_tasks_with_task_marker", "parse_keyword_arguments_from_signature_defaults", + "raise_error_when_task_functions_are_duplicated", "task", ] @@ -332,3 +334,29 @@ def _arg_value_to_id_component( else: id_component = arg_name + str(i) return id_component + + +def raise_error_when_task_functions_are_duplicated( + tasks: list[Callable[..., Any]], +) -> None: + """Raise error when task functions are duplicated. + + When task functions are created outside the loop body, every wrapped version of the + + """ + duplicates = find_duplicates(tasks) + if not duplicates: + return + + strings = [ + f"function_name={func.pytask_meta.name}, id={func.pytask_meta.id_}" + for func in duplicates + ] + flat_tree = format_strings_as_flat_tree(strings, "Duplicated tasks") + msg = ( + "There are some duplicates among the repeated tasks. It happens when you define" + "the task function outside the loop body and merely wrap in the loop body with " + "the 'task(...)(func)' decorator. As a workaround, wrap the task function in " + f"a lambda expression like 'task(...)(lambda **x: func(**x))'.\n\n{flat_tree}" + ) + raise ValueError(msg) diff --git a/tests/test_task.py b/tests/test_task.py index 3f427ce9..e8e2cf45 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -720,5 +720,6 @@ def func(): tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.COLLECTION_FAILED - assert "1 Failed" in result.output + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert tmp_path.joinpath("out.txt").read_text() == "Hello, World!" From 9805272675412fbd588961cd2883e9ff2d296823 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 18 Dec 2023 00:06:14 +0100 Subject: [PATCH 2/4] Add module argument to @task for cosmetic change. --- src/_pytask/task_utils.py | 41 ++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/_pytask/task_utils.py b/src/_pytask/task_utils.py index 4c3124ed..651cea57 100644 --- a/src/_pytask/task_utils.py +++ b/src/_pytask/task_utils.py @@ -3,10 +3,10 @@ import inspect from collections import defaultdict +from pathlib import Path from types import BuiltinFunctionType from typing import Any from typing import Callable -from typing import TYPE_CHECKING import attrs from _pytask.console import format_strings_as_flat_tree @@ -16,9 +16,6 @@ from _pytask.shared import find_duplicates from _pytask.typing import is_task_function -if TYPE_CHECKING: - from pathlib import Path - __all__ = [ "COLLECTED_TASKS", @@ -32,19 +29,20 @@ COLLECTED_TASKS: dict[Path | None, list[Callable[..., Any]]] = defaultdict(list) """A container for collecting tasks. -Tasks marked by the ``@pytask.mark.task`` decorator can be generated in a loop where one -iteration overwrites the previous task. To retrieve the tasks later, use this dictionary -mapping from paths of modules to a list of tasks per module. +Tasks marked by the ``@task`` decorator can be generated in a loop where one iteration +overwrites the previous task. To retrieve the tasks later, use this dictionary mapping +from paths of modules to a list of tasks per module. """ -def task( +def task( # noqa: PLR0913 name: str | None = None, *, after: str | Callable[..., Any] | list[Callable[..., Any]] | None = None, id: str | None = None, # noqa: A002 kwargs: dict[Any, Any] | None = None, + module: Path | str | None = None, produces: Any | None = None, ) -> Callable[..., Callable[..., Any]]: """Decorate a task function. @@ -71,6 +69,25 @@ def task( Use a dictionary to pass any keyword arguments to the task function which can be dependencies or products of the task. Read :ref:`task-kwargs` for more information. + module + An experimental and cosmetic feature. + + By default, the module is the location where the task function is defined. When + a task function is imported in a task module and wrapped with + :func:`@task `, this argument allows to set the path to the task + module instead of the imported module. + + .. code-block:: python + + from pytask import task + from module import function + + # Location will be 'module.py'. + @task()(function) + + # Location will be this module, e.g., 'task_module.py'. + @task(module=__file__)(function) + produces Use this argument if you want to parse the return of the task function as a product, but you cannot annotate the return of the function. See :doc:`this @@ -83,9 +100,11 @@ def task( .. code-block:: python - from typing import Annotated from pytask import task + from typing import Annotated + from pytask import task - @task def create_text_file() -> Annotated[str, Path("file.txt")]: + @task + def create_text_file() -> Annotated[str, Path("file.txt")]: return "Hello, World!" """ @@ -114,7 +133,7 @@ def wrapper(func: Callable[..., Any]) -> Callable[..., Any]: ) raise NotImplementedError(msg) - path = get_file(unwrapped) + path = Path(module).resolve() if module else get_file(unwrapped) parsed_kwargs = {} if kwargs is None else kwargs parsed_name = name if isinstance(name, str) else func.__name__ From c4901dd3a653538fb81e385c3238b677010f01ee Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 18 Dec 2023 00:33:17 +0100 Subject: [PATCH 3/4] Test module option. --- src/_pytask/task_utils.py | 13 ++++++++++++- tests/test_task.py | 25 +++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/_pytask/task_utils.py b/src/_pytask/task_utils.py index 651cea57..7e0f1ee3 100644 --- a/src/_pytask/task_utils.py +++ b/src/_pytask/task_utils.py @@ -77,6 +77,8 @@ def task( # noqa: PLR0913 :func:`@task `, this argument allows to set the path to the task module instead of the imported module. + Relative paths are resolved relative to the current working directory. + .. code-block:: python from pytask import task @@ -133,7 +135,16 @@ def wrapper(func: Callable[..., Any]) -> Callable[..., Any]: ) raise NotImplementedError(msg) - path = Path(module).resolve() if module else get_file(unwrapped) + if not module: + path = get_file(unwrapped) + else: + path = Path(module).resolve() + if not path.exists(): + msg = ( + f"Module '{path}' does not exist, but is set as " + "'@task(module=...)'." + ) + raise ValueError(msg) parsed_kwargs = {} if kwargs is None else kwargs parsed_name = name if isinstance(name, str) else func.__name__ diff --git a/tests/test_task.py b/tests/test_task.py index e8e2cf45..95fa8d50 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -697,14 +697,15 @@ def test_raise_error_with_builtin_function_as_task(runner, tmp_path): assert "Builtin functions cannot be wrapped" in result.output -def test_task_function_in_another_module(runner, tmp_path): +@pytest.mark.parametrize("module", [None, "__file__", "'a'"]) +def test_task_function_in_another_module(runner, tmp_path, module): source = """ def func(): return "Hello, World!" """ tmp_path.joinpath("module.py").write_text(textwrap.dedent(source)) - source = """ + source = f""" from pytask import task from pathlib import Path from _pytask.path import import_path @@ -715,11 +716,23 @@ def func(): module = import_path(_ROOT_PATH / "module.py", _ROOT_PATH) name_to_obj = dict(inspect.getmembers(module)) - task(produces=Path("out.txt"))(name_to_obj["func"]) + task(produces=Path("out.txt"), module={module})(name_to_obj["func"]) """ tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.OK - assert "1 Succeeded" in result.output - assert tmp_path.joinpath("out.txt").read_text() == "Hello, World!" + + if module == "'a'": + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "ValueError: Module" in result.output + + else: + assert result.exit_code == ExitCode.OK + assert "1 Succeeded" in result.output + assert tmp_path.joinpath("out.txt").read_text() == "Hello, World!" + + # Check whether the module is overwritten or not. + if module: + assert "task_example.py" in result.output + else: + assert "module.py" in result.output From c21a6ec403e0a2ca452b75e5722c9fdeb12eae0a Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 18 Dec 2023 00:50:38 +0100 Subject: [PATCH 4/4] Fix. --- src/_pytask/nodes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index b9c2f8ac..52a3d350 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -94,7 +94,8 @@ class Task(PTaskWithPath): base_name The base name of the task. path - Path to the file where the task was defined. + Path to the file where the task was defined. It is used to collect the path and + for displaying information. function The task function. name