diff --git a/docs/source/changes.md b/docs/source/changes.md index e9c9038f..5762765c 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -29,11 +29,17 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and - {pull}`586` improves linting. - {pull}`587` improves typing of `capture.py`. - {pull}`588` resets class variables of `ExecutionReport` and `Traceback`. +- {pull}`589` enables `import_path` to resolve the root path and module name of an + imported file. - {pull}`590` fixes an error introduced in {pull}`588`. - {pull}`591` invalidates the cache of fsspec when checking whether a remote file exists. Otherwise, a remote file might be reported as missing although it was just created. See https://github.com/fsspec/s3fs/issues/851 for more info. +## 0.4.7 - 2024-03-19 + +- {pull}`580` is a backport of {pull}`579`. + ## 0.4.6 - 2024-03-13 - {pull}`576` fixes accidentally collecting `pytask.MarkGenerator` when using diff --git a/environment.yml b/environment.yml index 1cd3b6fa..04062711 100644 --- a/environment.yml +++ b/environment.yml @@ -29,7 +29,6 @@ dependencies: - jupyterlab - matplotlib - nbmake - - pre-commit - pygraphviz - pytest - pytest-cov @@ -47,8 +46,8 @@ dependencies: - sphinx-click - sphinx-copybutton - sphinx-design >=0.3.0 + - sphinx-toolbox - sphinxext-opengraph - pip: - - sphinx-toolbox - -e . diff --git a/src/_pytask/path.py b/src/_pytask/path.py index d0bf1f58..aa4dc575 100644 --- a/src/_pytask/path.py +++ b/src/_pytask/path.py @@ -5,6 +5,7 @@ import contextlib import functools import importlib.util +import itertools import os import sys from pathlib import Path @@ -125,11 +126,25 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path: def import_path(path: Path, root: Path) -> ModuleType: """Import and return a module from the given path. - The function is taken from pytest when the import mode is set to ``importlib``. It - pytest's recommended import mode for new projects although the default is set to - ``prepend``. More discussion and information can be found in :issue:`373`. + The functions are taken from pytest when the import mode is set to ``importlib``. It + was assumed to be the new default import mode but insurmountable tradeoffs caused + the default to be set to ``prepend``. More discussion and information can be found + in :issue:`373`. """ + try: + pkg_root, module_name = _resolve_pkg_root_and_module_name(path) + except CouldNotResolvePathError: + pass + else: + # If the given module name is already in sys.modules, do not import it again. + with contextlib.suppress(KeyError): + return sys.modules[module_name] + + mod = _import_module_using_spec(module_name, path, pkg_root) + if mod is not None: + return mod + module_name = _module_name_from_path(path, root) with contextlib.suppress(KeyError): return sys.modules[module_name] @@ -147,42 +162,134 @@ def import_path(path: Path, root: Path) -> ModuleType: return mod +def _resolve_package_path(path: Path) -> Path | None: + """Resolve package path. + + Return the Python package path by looking for the last directory upwards which still + contains an ``__init__.py``. + + Returns None if it can not be determined. + + """ + result = None + for parent in itertools.chain((path,), path.parents): + if parent.is_dir(): + if not (parent / "__init__.py").is_file(): + break + if not parent.name.isidentifier(): + break + result = parent + return result + + +def _resolve_pkg_root_and_module_name(path: Path) -> tuple[Path, str]: + """Resolve the root package directory and module name for the given Python file. + + Return the path to the directory of the root package that contains the given Python + file, and its module name: + + .. code-block:: text + + src/ + app/ + __init__.py core/ + __init__.py models.py + + Passing the full path to `models.py` will yield Path("src") and "app.core.models". + + Raises CouldNotResolvePathError if the given path does not belong to a package + (missing any __init__.py files). + + """ + pkg_path = _resolve_package_path(path) + if pkg_path is not None: + pkg_root = pkg_path.parent + + names = list(path.with_suffix("").relative_to(pkg_root).parts) + if names[-1] == "__init__": + names.pop() + module_name = ".".join(names) + return pkg_root, module_name + + msg = f"Could not resolve for {path}" + raise CouldNotResolvePathError(msg) + + +class CouldNotResolvePathError(Exception): + """Custom exception raised by _resolve_pkg_root_and_module_name.""" + + +def _import_module_using_spec( + module_name: str, module_path: Path, module_location: Path +) -> ModuleType | None: + """Import a module using its specification. + + Tries to import a module by its canonical name, path to the .py file, and its parent + location. + + """ + # Checking with sys.meta_path first in case one of its hooks can import this module, + # such as our own assertion-rewrite hook. + for meta_importer in sys.meta_path: + spec = meta_importer.find_spec(module_name, [str(module_location)]) + if spec is not None: + break + else: + spec = importlib.util.spec_from_file_location(module_name, str(module_path)) + if spec is not None: + mod = importlib.util.module_from_spec(spec) + sys.modules[module_name] = mod + spec.loader.exec_module(mod) # type: ignore[union-attr] + return mod + + return None + + def _module_name_from_path(path: Path, root: Path) -> str: """Return a dotted module name based on the given path, anchored on root. - For example: path="projects/src/project/task_foo.py" and root="/projects", the - resulting module name will be "src.project.task_foo". + For example: path="projects/src/tasks/task_foo.py" and root="/projects", the + resulting module name will be "src.tasks.task_foo". """ path = path.with_suffix("") try: relative_path = path.relative_to(root) except ValueError: - # If we can't get a relative path to root, use the full path, except for the - # first part ("d:\\" or "/" depending on the platform, for example). + # If we can't get a relative path to root, use the full path, except + # for the first part ("d:\\" or "/" depending on the platform, for example). path_parts = path.parts[1:] else: # Use the parts for the relative path to the root path. path_parts = relative_path.parts - # Module name for packages do not contain the __init__ file, unless the - # `__init__.py` file is at the root. + # Module name for packages do not contain the __init__ file, unless + # the `__init__.py` file is at the root. if len(path_parts) >= 2 and path_parts[-1] == "__init__": # noqa: PLR2004 path_parts = path_parts[:-1] + # Module names cannot contain ".", normalize them to "_". This prevents a directory + # having a "." in the name (".env.310" for example) causing extra intermediate + # modules. Also, important to replace "." at the start of paths, as those are + # considered relative imports. + path_parts = tuple(x.replace(".", "_") for x in path_parts) + return ".".join(path_parts) def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> None: - """Insert missing modules when importing modules with :func:`import_path`. + """Insert missing modules in sys.modules. - When we want to import a module as ``src.project.task_foo`` for example, we need to - create empty modules ``src`` and ``src.project`` after inserting - ``src.project.task_foo``, otherwise ``src.project.task_foo`` is not importable by - ``__import__``. + Used by ``import_path`` to create intermediate modules when using mode=importlib. + When we want to import a module as "src.tasks.task_foo" for example, we need to + create empty modules "src" and "src.tasks" after inserting "src.tasks.task_foo", + otherwise "src.tasks.task_foo" is not importable by ``__import__``. """ module_parts = module_name.split(".") + child_module: ModuleType | None = None + module: ModuleType | None = None + child_name: str = "" while module_name: if module_name not in modules: try: @@ -192,13 +299,20 @@ def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> # creating a dummy module. if not sys.meta_path: raise ModuleNotFoundError # noqa: TRY301 - importlib.import_module(module_name) + module = importlib.import_module(module_name) except ModuleNotFoundError: module = ModuleType( module_name, - doc="Empty module created by pytask.", + doc="Empty module created by pytest's importmode=importlib.", ) - modules[module_name] = module + else: + module = modules[module_name] + # Add child attribute to the parent that can reference the child modules. + if child_module and not hasattr(module, child_name): + setattr(module, child_name, child_module) + modules[module_name] = module + # Keep track of the child module while moving up the tree. + child_module, child_name = module, module_name.rpartition(".")[-1] module_parts.pop(-1) module_name = ".".join(module_parts) diff --git a/tests/test_path.py b/tests/test_path.py index d79f4ffa..286b512d 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -179,182 +179,214 @@ def test_no_meta_path_found( @pytest.mark.unit() -def test_importmode_importlib_with_dataclass(tmp_path: Path) -> None: - """ - Ensure that importlib mode works with a module containing dataclasses (#373, - pytest#7856). - """ - fn = tmp_path.joinpath("_src/project/task_dataclass.py") - fn.parent.mkdir(parents=True) - fn.write_text( - textwrap.dedent( - """ - from dataclasses import dataclass - - @dataclass - class Data: - value: str - """ +class TestImportLibMode: + def test_importmode_importlib_with_dataclass(self, tmp_path: Path) -> None: + """ + Ensure that importlib mode works with a module containing dataclasses (#373, + pytest#7856). + """ + fn = tmp_path.joinpath("_src/project/task_dataclass.py") + fn.parent.mkdir(parents=True) + fn.write_text( + textwrap.dedent( + """ + from dataclasses import dataclass + + @dataclass + class Data: + value: str + """ + ) ) - ) - - module = import_path(fn, root=tmp_path) - Data: Any = module.Data # noqa: N806 - data = Data(value="foo") - assert data.value == "foo" - assert data.__module__ == "_src.project.task_dataclass" - -@pytest.mark.unit() -def test_importmode_importlib_with_pickle(tmp_path: Path) -> None: - """Ensure that importlib mode works with pickle (#373, pytest#7859).""" - fn = tmp_path.joinpath("_src/project/task_pickle.py") - fn.parent.mkdir(parents=True) - fn.write_text( - textwrap.dedent( - """ - import pickle - - def _action(): - return 42 - - def round_trip(): - s = pickle.dumps(_action) - return pickle.loads(s) - """ + module = import_path(fn, root=tmp_path) + Data: Any = module.Data # noqa: N806 + data = Data(value="foo") + assert data.value == "foo" + assert data.__module__ == "_src.project.task_dataclass" + + # Ensure we do not import the same module again (pytest#11475). + module2 = import_path(fn, root=tmp_path) + assert module is module2 + + def test_importmode_importlib_with_pickle(self, tmp_path: Path) -> None: + """Ensure that importlib mode works with pickle (#373, pytest#7859).""" + fn = tmp_path.joinpath("_src/project/task_pickle.py") + fn.parent.mkdir(parents=True) + fn.write_text( + textwrap.dedent( + """ + import pickle + + def _action(): + return 42 + + def round_trip(): + s = pickle.dumps(_action) + return pickle.loads(s) + """ + ) ) - ) - module = import_path(fn, root=tmp_path) - round_trip = module.round_trip - action = round_trip() - assert action() == 42 - - -@pytest.mark.unit() -def test_importmode_importlib_with_pickle_separate_modules(tmp_path: Path) -> None: - """ - Ensure that importlib mode works can load pickles that look similar but are - defined in separate modules. - """ - fn1 = tmp_path.joinpath("_src/m1/project/task.py") - fn1.parent.mkdir(parents=True) - fn1.write_text( - textwrap.dedent( - """ - import dataclasses - import pickle - - @dataclasses.dataclass - class Data: - x: int = 42 - """ + module = import_path(fn, root=tmp_path) + round_trip = module.round_trip + action = round_trip() + assert action() == 42 + + def test_importmode_importlib_with_pickle_separate_modules( + self, tmp_path: Path + ) -> None: + """ + Ensure that importlib mode works can load pickles that look similar but are + defined in separate modules. + """ + fn1 = tmp_path.joinpath("_src/m1/project/task.py") + fn1.parent.mkdir(parents=True) + fn1.write_text( + textwrap.dedent( + """ + import dataclasses + import pickle + + @dataclasses.dataclass + class Data: + x: int = 42 + """ + ) ) - ) - fn2 = tmp_path.joinpath("_src/m2/project/task.py") - fn2.parent.mkdir(parents=True) - fn2.write_text( - textwrap.dedent( - """ - import dataclasses - import pickle - - @dataclasses.dataclass - class Data: - x: str = "" - """ + fn2 = tmp_path.joinpath("_src/m2/project/task.py") + fn2.parent.mkdir(parents=True) + fn2.write_text( + textwrap.dedent( + """ + import dataclasses + import pickle + + @dataclasses.dataclass + class Data: + x: str = "" + """ + ) ) - ) - import pickle + import pickle - def round_trip(obj): - s = pickle.dumps(obj) - return pickle.loads(s) # noqa: S301 + def round_trip(obj): + s = pickle.dumps(obj) + return pickle.loads(s) # noqa: S301 - module = import_path(fn1, root=tmp_path) - Data1 = module.Data # noqa: N806 + module = import_path(fn1, root=tmp_path) + Data1 = module.Data # noqa: N806 - module = import_path(fn2, root=tmp_path) - Data2 = module.Data # noqa: N806 + module = import_path(fn2, root=tmp_path) + Data2 = module.Data # noqa: N806 - assert round_trip(Data1(20)) == Data1(20) - assert round_trip(Data2("hello")) == Data2("hello") - assert Data1.__module__ == "_src.m1.project.task" - assert Data2.__module__ == "_src.m2.project.task" + assert round_trip(Data1(20)) == Data1(20) + assert round_trip(Data2("hello")) == Data2("hello") + assert Data1.__module__ == "_src.m1.project.task" + assert Data2.__module__ == "_src.m2.project.task" + def test_module_name_from_path(self, tmp_path: Path) -> None: + result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path) + assert result == "src.project.task_foo" -@pytest.mark.unit() -def test_module_name_from_path(tmp_path: Path) -> None: - result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path) - assert result == "src.project.task_foo" + # Path is not relative to root dir: use the full path to obtain the module name. + result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar")) + assert result == "home.foo.task_foo" - # Path is not relative to root dir: use the full path to obtain the module name. - result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar")) - assert result == "home.foo.task_foo" + # Importing __init__.py files should return the package as module name. + result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path) + assert result == "src.app" - # Importing __init__.py files should return the package as module name. - result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path) - assert result == "src.app" + # Unless __init__.py file is at the root, in which case we cannot have an empty + # module name. + result = _module_name_from_path(tmp_path / "__init__.py", tmp_path) + assert result == "__init__" + # Modules which start with "." are considered relative and will not be imported + # unless part of a package, so we replace it with a "_" when generating the fake + # module name. + result = _module_name_from_path(tmp_path / ".env/tasks/task_foo.py", tmp_path) + assert result == "_env.tasks.task_foo" -@pytest.mark.unit() -def test_insert_missing_modules( - monkeypatch: pytest.MonkeyPatch, tmp_path: Path -) -> None: - monkeypatch.chdir(tmp_path) - # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and - # don't end up being imported. - modules = {"xxx.project.foo": ModuleType("xxx.project.foo")} - _insert_missing_modules(modules, "xxx.project.foo") - assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"] - - mod = ModuleType("mod", doc="My Module") - modules = {"xxy": mod} - _insert_missing_modules(modules, "xxy") - assert modules == {"xxy": mod} - - modules = {} - _insert_missing_modules(modules, "") - assert not modules - - -@pytest.mark.unit() -def test_importlib_package(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): - """ - Importing a package using --importmode=importlib should not import the - package's __init__.py file more than once (#11306). - """ - monkeypatch.chdir(tmp_path) - monkeypatch.syspath_prepend(tmp_path) - - package_name = "importlib_import_package" - tmp_path.joinpath(package_name).mkdir() - init = tmp_path.joinpath(f"{package_name}/__init__.py") - init.write_text( - textwrap.dedent( - """ - from .singleton import Singleton - instance = Singleton() - """ - ), - encoding="ascii", - ) - singleton = tmp_path.joinpath(f"{package_name}/singleton.py") - singleton.write_text( - textwrap.dedent( - """ - class Singleton: - INSTANCES = [] - def __init__(self) -> None: - self.INSTANCES.append(self) - if len(self.INSTANCES) > 1: - raise RuntimeError("Already initialized") - """ - ), - encoding="ascii", - ) + # We want to avoid generating extra intermediate modules if some directory just + # happens to contain a "." in the name. + result = _module_name_from_path( + tmp_path / ".env.310/tasks/task_foo.py", tmp_path + ) + assert result == "_env_310.tasks.task_foo" + + def test_insert_missing_modules( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + monkeypatch.chdir(tmp_path) + # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and + # don't end up being imported. + modules = {"xxx.project.foo": ModuleType("xxx.project.foo")} + _insert_missing_modules(modules, "xxx.project.foo") + assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"] + + mod = ModuleType("mod", doc="My Module") + modules = {"xxy": mod} + _insert_missing_modules(modules, "xxy") + assert modules == {"xxy": mod} + + modules = {} + _insert_missing_modules(modules, "") + assert not modules + + def test_parent_contains_child_module_attribute( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ): + monkeypatch.chdir(tmp_path) + # Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and + # don't end up being imported. + modules = {"xxx.tasks.foo": ModuleType("xxx.tasks.foo")} + _insert_missing_modules(modules, "xxx.tasks.foo") + assert sorted(modules) == ["xxx", "xxx.tasks", "xxx.tasks.foo"] + assert modules["xxx"].tasks is modules["xxx.tasks"] + assert modules["xxx.tasks"].foo is modules["xxx.tasks.foo"] + + def test_importlib_package( + self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path + ) -> None: + """ + Importing a package using --importmode=importlib should not import the + package's __init__.py file more than once (#11306). + """ + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(tmp_path) + + package_name = "importlib_import_package" + tmp_path.joinpath(package_name).mkdir() + init = tmp_path.joinpath(f"{package_name}/__init__.py") + init.write_text( + textwrap.dedent( + """ + from .singleton import Singleton + instance = Singleton() + """ + ), + encoding="ascii", + ) + singleton = tmp_path.joinpath(f"{package_name}/singleton.py") + singleton.write_text( + textwrap.dedent( + """ + class Singleton: + INSTANCES = [] + def __init__(self) -> None: + self.INSTANCES.append(self) + if len(self.INSTANCES) > 1: + raise RuntimeError("Already initialized") + """ + ), + encoding="ascii", + ) - mod = import_path(init, root=tmp_path) - assert len(mod.instance.INSTANCES) == 1 + mod = import_path(init, root=tmp_path) + assert len(mod.instance.INSTANCES) == 1 + # Ensure we do not import the same module again (#11475). + mod2 = import_path(init, root=tmp_path) + assert mod is mod2