Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve root paths and module names for imported files. #589

Merged
merged 16 commits into from
Apr 4, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/changes.md
Original file line number Diff line number Diff line change
@@ -29,11 +29,17 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
- {pull}`586` improves linting.
- {pull}`587` improves typing of `capture.py`.
- {pull}`588` resets class variables of `ExecutionReport` and `Traceback`.
- {pull}`589` enables `import_path` to resolve the root path and module name of an
imported file.
- {pull}`590` fixes an error introduced in {pull}`588`.
- {pull}`591` invalidates the cache of fsspec when checking whether a remote file
exists. Otherwise, a remote file might be reported as missing although it was just
created. See https://github.com/fsspec/s3fs/issues/851 for more info.

## 0.4.7 - 2024-03-19

- {pull}`580` is a backport of {pull}`579`.

## 0.4.6 - 2024-03-13

- {pull}`576` fixes accidentally collecting `pytask.MarkGenerator` when using
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -29,7 +29,6 @@ dependencies:
- jupyterlab
- matplotlib
- nbmake
- pre-commit
- pygraphviz
- pytest
- pytest-cov
@@ -47,8 +46,8 @@ dependencies:
- sphinx-click
- sphinx-copybutton
- sphinx-design >=0.3.0
- sphinx-toolbox
- sphinxext-opengraph

- pip:
- sphinx-toolbox
- -e .
148 changes: 131 additions & 17 deletions src/_pytask/path.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
import contextlib
import functools
import importlib.util
import itertools
import os
import sys
from pathlib import Path
@@ -125,11 +126,25 @@ def find_case_sensitive_path(path: Path, platform: str) -> Path:
def import_path(path: Path, root: Path) -> ModuleType:
"""Import and return a module from the given path.

The function is taken from pytest when the import mode is set to ``importlib``. It
pytest's recommended import mode for new projects although the default is set to
``prepend``. More discussion and information can be found in :issue:`373`.
The functions are taken from pytest when the import mode is set to ``importlib``. It
was assumed to be the new default import mode but insurmountable tradeoffs caused
the default to be set to ``prepend``. More discussion and information can be found
in :issue:`373`.

"""
try:
pkg_root, module_name = _resolve_pkg_root_and_module_name(path)
except CouldNotResolvePathError:
pass
else:
# If the given module name is already in sys.modules, do not import it again.
with contextlib.suppress(KeyError):
return sys.modules[module_name]

mod = _import_module_using_spec(module_name, path, pkg_root)
if mod is not None:
return mod

module_name = _module_name_from_path(path, root)
with contextlib.suppress(KeyError):
return sys.modules[module_name]
@@ -147,42 +162,134 @@ def import_path(path: Path, root: Path) -> ModuleType:
return mod


def _resolve_package_path(path: Path) -> Path | None:
"""Resolve package path.

Return the Python package path by looking for the last directory upwards which still
contains an ``__init__.py``.

Returns None if it can not be determined.

"""
result = None
for parent in itertools.chain((path,), path.parents):
if parent.is_dir():
if not (parent / "__init__.py").is_file():
break
if not parent.name.isidentifier():
break
result = parent
return result


def _resolve_pkg_root_and_module_name(path: Path) -> tuple[Path, str]:
"""Resolve the root package directory and module name for the given Python file.

Return the path to the directory of the root package that contains the given Python
file, and its module name:

.. code-block:: text

src/
app/
__init__.py core/
__init__.py models.py

Passing the full path to `models.py` will yield Path("src") and "app.core.models".

Raises CouldNotResolvePathError if the given path does not belong to a package
(missing any __init__.py files).

"""
pkg_path = _resolve_package_path(path)
if pkg_path is not None:
pkg_root = pkg_path.parent

names = list(path.with_suffix("").relative_to(pkg_root).parts)
if names[-1] == "__init__":
names.pop()
module_name = ".".join(names)
return pkg_root, module_name

msg = f"Could not resolve for {path}"
raise CouldNotResolvePathError(msg)


class CouldNotResolvePathError(Exception):
"""Custom exception raised by _resolve_pkg_root_and_module_name."""


def _import_module_using_spec(
module_name: str, module_path: Path, module_location: Path
) -> ModuleType | None:
"""Import a module using its specification.

Tries to import a module by its canonical name, path to the .py file, and its parent
location.

"""
# Checking with sys.meta_path first in case one of its hooks can import this module,
# such as our own assertion-rewrite hook.
for meta_importer in sys.meta_path:
spec = meta_importer.find_spec(module_name, [str(module_location)])
if spec is not None:
break
else:
spec = importlib.util.spec_from_file_location(module_name, str(module_path))
if spec is not None:
mod = importlib.util.module_from_spec(spec)
sys.modules[module_name] = mod
spec.loader.exec_module(mod) # type: ignore[union-attr]
return mod

return None


def _module_name_from_path(path: Path, root: Path) -> str:
"""Return a dotted module name based on the given path, anchored on root.

For example: path="projects/src/project/task_foo.py" and root="/projects", the
resulting module name will be "src.project.task_foo".
For example: path="projects/src/tasks/task_foo.py" and root="/projects", the
resulting module name will be "src.tasks.task_foo".

"""
path = path.with_suffix("")
try:
relative_path = path.relative_to(root)
except ValueError:
# If we can't get a relative path to root, use the full path, except for the
# first part ("d:\\" or "/" depending on the platform, for example).
# If we can't get a relative path to root, use the full path, except
# for the first part ("d:\\" or "/" depending on the platform, for example).
path_parts = path.parts[1:]
else:
# Use the parts for the relative path to the root path.
path_parts = relative_path.parts

# Module name for packages do not contain the __init__ file, unless the
# `__init__.py` file is at the root.
# Module name for packages do not contain the __init__ file, unless
# the `__init__.py` file is at the root.
if len(path_parts) >= 2 and path_parts[-1] == "__init__": # noqa: PLR2004
path_parts = path_parts[:-1]

# Module names cannot contain ".", normalize them to "_". This prevents a directory
# having a "." in the name (".env.310" for example) causing extra intermediate
# modules. Also, important to replace "." at the start of paths, as those are
# considered relative imports.
path_parts = tuple(x.replace(".", "_") for x in path_parts)

return ".".join(path_parts)


def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) -> None:
"""Insert missing modules when importing modules with :func:`import_path`.
"""Insert missing modules in sys.modules.

When we want to import a module as ``src.project.task_foo`` for example, we need to
create empty modules ``src`` and ``src.project`` after inserting
``src.project.task_foo``, otherwise ``src.project.task_foo`` is not importable by
``__import__``.
Used by ``import_path`` to create intermediate modules when using mode=importlib.
When we want to import a module as "src.tasks.task_foo" for example, we need to
create empty modules "src" and "src.tasks" after inserting "src.tasks.task_foo",
otherwise "src.tasks.task_foo" is not importable by ``__import__``.

"""
module_parts = module_name.split(".")
child_module: ModuleType | None = None
module: ModuleType | None = None
child_name: str = ""
while module_name:
if module_name not in modules:
try:
@@ -192,13 +299,20 @@ def _insert_missing_modules(modules: dict[str, ModuleType], module_name: str) ->
# creating a dummy module.
if not sys.meta_path:
raise ModuleNotFoundError # noqa: TRY301
importlib.import_module(module_name)
module = importlib.import_module(module_name)
except ModuleNotFoundError:
module = ModuleType(
module_name,
doc="Empty module created by pytask.",
doc="Empty module created by pytest's importmode=importlib.",
)
modules[module_name] = module
else:
module = modules[module_name]
# Add child attribute to the parent that can reference the child modules.
if child_module and not hasattr(module, child_name):
setattr(module, child_name, child_module)
modules[module_name] = module
# Keep track of the child module while moving up the tree.
child_module, child_name = module, module_name.rpartition(".")[-1]
module_parts.pop(-1)
module_name = ".".join(module_parts)

352 changes: 192 additions & 160 deletions tests/test_path.py
Original file line number Diff line number Diff line change
@@ -179,182 +179,214 @@ def test_no_meta_path_found(


@pytest.mark.unit()
def test_importmode_importlib_with_dataclass(tmp_path: Path) -> None:
"""
Ensure that importlib mode works with a module containing dataclasses (#373,
pytest#7856).
"""
fn = tmp_path.joinpath("_src/project/task_dataclass.py")
fn.parent.mkdir(parents=True)
fn.write_text(
textwrap.dedent(
"""
from dataclasses import dataclass
@dataclass
class Data:
value: str
"""
class TestImportLibMode:
def test_importmode_importlib_with_dataclass(self, tmp_path: Path) -> None:
"""
Ensure that importlib mode works with a module containing dataclasses (#373,
pytest#7856).
"""
fn = tmp_path.joinpath("_src/project/task_dataclass.py")
fn.parent.mkdir(parents=True)
fn.write_text(
textwrap.dedent(
"""
from dataclasses import dataclass
@dataclass
class Data:
value: str
"""
)
)
)

module = import_path(fn, root=tmp_path)
Data: Any = module.Data # noqa: N806
data = Data(value="foo")
assert data.value == "foo"
assert data.__module__ == "_src.project.task_dataclass"


@pytest.mark.unit()
def test_importmode_importlib_with_pickle(tmp_path: Path) -> None:
"""Ensure that importlib mode works with pickle (#373, pytest#7859)."""
fn = tmp_path.joinpath("_src/project/task_pickle.py")
fn.parent.mkdir(parents=True)
fn.write_text(
textwrap.dedent(
"""
import pickle
def _action():
return 42
def round_trip():
s = pickle.dumps(_action)
return pickle.loads(s)
"""
module = import_path(fn, root=tmp_path)
Data: Any = module.Data # noqa: N806
data = Data(value="foo")
assert data.value == "foo"
assert data.__module__ == "_src.project.task_dataclass"

# Ensure we do not import the same module again (pytest#11475).
module2 = import_path(fn, root=tmp_path)
assert module is module2

def test_importmode_importlib_with_pickle(self, tmp_path: Path) -> None:
"""Ensure that importlib mode works with pickle (#373, pytest#7859)."""
fn = tmp_path.joinpath("_src/project/task_pickle.py")
fn.parent.mkdir(parents=True)
fn.write_text(
textwrap.dedent(
"""
import pickle
def _action():
return 42
def round_trip():
s = pickle.dumps(_action)
return pickle.loads(s)
"""
)
)
)

module = import_path(fn, root=tmp_path)
round_trip = module.round_trip
action = round_trip()
assert action() == 42


@pytest.mark.unit()
def test_importmode_importlib_with_pickle_separate_modules(tmp_path: Path) -> None:
"""
Ensure that importlib mode works can load pickles that look similar but are
defined in separate modules.
"""
fn1 = tmp_path.joinpath("_src/m1/project/task.py")
fn1.parent.mkdir(parents=True)
fn1.write_text(
textwrap.dedent(
"""
import dataclasses
import pickle
@dataclasses.dataclass
class Data:
x: int = 42
"""
module = import_path(fn, root=tmp_path)
round_trip = module.round_trip
action = round_trip()
assert action() == 42

def test_importmode_importlib_with_pickle_separate_modules(
self, tmp_path: Path
) -> None:
"""
Ensure that importlib mode works can load pickles that look similar but are
defined in separate modules.
"""
fn1 = tmp_path.joinpath("_src/m1/project/task.py")
fn1.parent.mkdir(parents=True)
fn1.write_text(
textwrap.dedent(
"""
import dataclasses
import pickle
@dataclasses.dataclass
class Data:
x: int = 42
"""
)
)
)

fn2 = tmp_path.joinpath("_src/m2/project/task.py")
fn2.parent.mkdir(parents=True)
fn2.write_text(
textwrap.dedent(
"""
import dataclasses
import pickle
@dataclasses.dataclass
class Data:
x: str = ""
"""
fn2 = tmp_path.joinpath("_src/m2/project/task.py")
fn2.parent.mkdir(parents=True)
fn2.write_text(
textwrap.dedent(
"""
import dataclasses
import pickle
@dataclasses.dataclass
class Data:
x: str = ""
"""
)
)
)

import pickle
import pickle

def round_trip(obj):
s = pickle.dumps(obj)
return pickle.loads(s) # noqa: S301
def round_trip(obj):
s = pickle.dumps(obj)
return pickle.loads(s) # noqa: S301

module = import_path(fn1, root=tmp_path)
Data1 = module.Data # noqa: N806
module = import_path(fn1, root=tmp_path)
Data1 = module.Data # noqa: N806

module = import_path(fn2, root=tmp_path)
Data2 = module.Data # noqa: N806
module = import_path(fn2, root=tmp_path)
Data2 = module.Data # noqa: N806

assert round_trip(Data1(20)) == Data1(20)
assert round_trip(Data2("hello")) == Data2("hello")
assert Data1.__module__ == "_src.m1.project.task"
assert Data2.__module__ == "_src.m2.project.task"
assert round_trip(Data1(20)) == Data1(20)
assert round_trip(Data2("hello")) == Data2("hello")
assert Data1.__module__ == "_src.m1.project.task"
assert Data2.__module__ == "_src.m2.project.task"

def test_module_name_from_path(self, tmp_path: Path) -> None:
result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path)
assert result == "src.project.task_foo"

@pytest.mark.unit()
def test_module_name_from_path(tmp_path: Path) -> None:
result = _module_name_from_path(tmp_path / "src/project/task_foo.py", tmp_path)
assert result == "src.project.task_foo"
# Path is not relative to root dir: use the full path to obtain the module name.
result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar"))
assert result == "home.foo.task_foo"

# Path is not relative to root dir: use the full path to obtain the module name.
result = _module_name_from_path(Path("/home/foo/task_foo.py"), Path("/bar"))
assert result == "home.foo.task_foo"
# Importing __init__.py files should return the package as module name.
result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path)
assert result == "src.app"

# Importing __init__.py files should return the package as module name.
result = _module_name_from_path(tmp_path / "src/app/__init__.py", tmp_path)
assert result == "src.app"
# Unless __init__.py file is at the root, in which case we cannot have an empty
# module name.
result = _module_name_from_path(tmp_path / "__init__.py", tmp_path)
assert result == "__init__"

# Modules which start with "." are considered relative and will not be imported
# unless part of a package, so we replace it with a "_" when generating the fake
# module name.
result = _module_name_from_path(tmp_path / ".env/tasks/task_foo.py", tmp_path)
assert result == "_env.tasks.task_foo"

@pytest.mark.unit()
def test_insert_missing_modules(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
monkeypatch.chdir(tmp_path)
# Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and
# don't end up being imported.
modules = {"xxx.project.foo": ModuleType("xxx.project.foo")}
_insert_missing_modules(modules, "xxx.project.foo")
assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"]

mod = ModuleType("mod", doc="My Module")
modules = {"xxy": mod}
_insert_missing_modules(modules, "xxy")
assert modules == {"xxy": mod}

modules = {}
_insert_missing_modules(modules, "")
assert not modules


@pytest.mark.unit()
def test_importlib_package(monkeypatch: pytest.MonkeyPatch, tmp_path: Path):
"""
Importing a package using --importmode=importlib should not import the
package's __init__.py file more than once (#11306).
"""
monkeypatch.chdir(tmp_path)
monkeypatch.syspath_prepend(tmp_path)

package_name = "importlib_import_package"
tmp_path.joinpath(package_name).mkdir()
init = tmp_path.joinpath(f"{package_name}/__init__.py")
init.write_text(
textwrap.dedent(
"""
from .singleton import Singleton
instance = Singleton()
"""
),
encoding="ascii",
)
singleton = tmp_path.joinpath(f"{package_name}/singleton.py")
singleton.write_text(
textwrap.dedent(
"""
class Singleton:
INSTANCES = []
def __init__(self) -> None:
self.INSTANCES.append(self)
if len(self.INSTANCES) > 1:
raise RuntimeError("Already initialized")
"""
),
encoding="ascii",
)
# We want to avoid generating extra intermediate modules if some directory just
# happens to contain a "." in the name.
result = _module_name_from_path(
tmp_path / ".env.310/tasks/task_foo.py", tmp_path
)
assert result == "_env_310.tasks.task_foo"

def test_insert_missing_modules(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
monkeypatch.chdir(tmp_path)
# Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and
# don't end up being imported.
modules = {"xxx.project.foo": ModuleType("xxx.project.foo")}
_insert_missing_modules(modules, "xxx.project.foo")
assert sorted(modules) == ["xxx", "xxx.project", "xxx.project.foo"]

mod = ModuleType("mod", doc="My Module")
modules = {"xxy": mod}
_insert_missing_modules(modules, "xxy")
assert modules == {"xxy": mod}

modules = {}
_insert_missing_modules(modules, "")
assert not modules

def test_parent_contains_child_module_attribute(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
):
monkeypatch.chdir(tmp_path)
# Use 'xxx' and 'xxy' as parent names as they are unlikely to exist and
# don't end up being imported.
modules = {"xxx.tasks.foo": ModuleType("xxx.tasks.foo")}
_insert_missing_modules(modules, "xxx.tasks.foo")
assert sorted(modules) == ["xxx", "xxx.tasks", "xxx.tasks.foo"]
assert modules["xxx"].tasks is modules["xxx.tasks"]
assert modules["xxx.tasks"].foo is modules["xxx.tasks.foo"]

def test_importlib_package(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
"""
Importing a package using --importmode=importlib should not import the
package's __init__.py file more than once (#11306).
"""
monkeypatch.chdir(tmp_path)
monkeypatch.syspath_prepend(tmp_path)

package_name = "importlib_import_package"
tmp_path.joinpath(package_name).mkdir()
init = tmp_path.joinpath(f"{package_name}/__init__.py")
init.write_text(
textwrap.dedent(
"""
from .singleton import Singleton
instance = Singleton()
"""
),
encoding="ascii",
)
singleton = tmp_path.joinpath(f"{package_name}/singleton.py")
singleton.write_text(
textwrap.dedent(
"""
class Singleton:
INSTANCES = []
def __init__(self) -> None:
self.INSTANCES.append(self)
if len(self.INSTANCES) > 1:
raise RuntimeError("Already initialized")
"""
),
encoding="ascii",
)

mod = import_path(init, root=tmp_path)
assert len(mod.instance.INSTANCES) == 1
mod = import_path(init, root=tmp_path)
assert len(mod.instance.INSTANCES) == 1
# Ensure we do not import the same module again (#11475).
mod2 = import_path(init, root=tmp_path)
assert mod is mod2