diff --git a/.importlinter b/.importlinter index e5f8394ef2..4055e67936 100644 --- a/.importlinter +++ b/.importlinter @@ -90,50 +90,9 @@ layers = afno | dlwp | dlwp_healpix | domino | dpot | fengwu | figconvnet | fno | graphcast | meshgraphnet | pangu | pix2pix | rnn | srrn | swinvrnn | topodiff | transolver | vfgn unet | diffusion | dlwp_healpix_layers -[importlinter:contract:physicsnemo-core-external-imports] -name = Prevent Non-listed external imports in physicsnemo core +[importlinter:contract:physicsnemo-external-imports] +name = Prevent Non-listed external imports in physicsnemo type = forbidden_import -container = physicsnemo.core -dependency_group = core - -[importlinter:contract:physicsnemo-distributed-external-imports] -name = Prevent Non-listed external imports in physicsnemo distributed -type = forbidden_import -container = physicsnemo.distributed -dependency_group = distributed - -[importlinter:contract:physicsnemo-utils-external-imports] -name = Prevent Non-listed external imports in physicsnemo utils -type = forbidden_import -container = physicsnemo.utils -dependency_group = utils - -[importlinter:contract:physicsnemo-nn-external-imports] -name = Prevent Non-listed external imports in physicsnemo nn -type = forbidden_import -container = physicsnemo.nn -dependency_group = nn - -[importlinter:contract:physicsnemo-models-external-imports] -name = Prevent Non-listed external imports in physicsnemo models -type = forbidden_import -container = physicsnemo.models -dependency_group = models - -[importlinter:contract:physicsnemo-metrics-external-imports] -name = Prevent Non-listed external imports in physicsnemo metrics -type = forbidden_import -container = physicsnemo.metrics -dependency_group = metrics - -; [importlinter:contract:physicsnemo-datapipes-external-imports] -; name = Prevent Non-listed external imports in physicsnemo datapipes -; type = forbidden_import -; container = physicsnemo.datapipes -; dependency_group = datapipes - -[importlinter:contract:physicsnemo-domain_parallel-external-imports] -name = Prevent Non-listed external imports in physicsnemo domain_parallel -type = forbidden_import -container = physicsnemo.domain_parallel -dependency_group = domain_parallel \ No newline at end of file +container = physicsnemo +exclude = + datapipes diff --git a/pyproject.toml b/pyproject.toml index cb0178f3b0..b22a2d1eab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ +##################################################################### +# Core Project metadata +##################################################################### [project] name = "nvidia-physicsnemo" authors = [ @@ -10,31 +13,32 @@ readme = "README.md" requires-python = ">=3.10" license = "Apache-2.0" -# NOTE: this is meant to move to `uv` off of dependency-groups. -# This is just informational here: -#_dependencies = [ -# "certifi>=2023.7.22", -# "einops>=0.8.0", -# "fsspec>=2023.1.0", -# "numpy>=1.22.4", -# "onnx>=1.14.0", -# "packaging>=24.2", -# "requests>=2.32.2", -# "s3fs>=2023.5.0", -# "setuptools>=77.0.3", -# "timm>=1.0.0", -# "torch>=2.4.0", -# "tqdm>=4.60.0", -# "treelib>=1.2.5", -# "xarray>=2023.1.0", -# "zarr>=2.14.2", -# ] + classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent", ] dynamic = ["version", "optional_dependencies"] +dependencies = [ + "termcolor", + "onnx>=1.14.0", + "warp-lang", + "pandas", + "nvtx", + "treelib>=1.2.5", + "numpy>=1.22.4", + "torch>=2.4.0", + "tqdm>=4.60.0", + "requests>=2.32.2", + "GitPython", + "s3fs>=2023.5.0", + "packaging>=24.2", + "h5py", + "jaxtyping", + "cftime" +] + [project.urls] Homepage = "https://github.com/NVIDIA/physicsnemo" Documentation = "https://docs.nvidia.com/physicsnemo/index.html#core" @@ -43,17 +47,34 @@ Changelog = "https://github.com/NVIDIA/physicsnemo/blob/main/CHANGELOG.md" -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +##################################################################### +# Flags for UV compatibility +##################################################################### [tool.uv] no-build-isolation-package = [ "torch_scatter", "torch_cluster", + "earth2grid", ] managed = true -default-groups = ["physicsnemo"] + +# torch-sparse requires us to tell uv about it's dependency on torch. +[tool.uv.extra-build-dependencies] +torch-sparse = ["torch"] +earth2grid = ["setuptools", "torch"] + +# Earth2 grid is not on pypi .... +[tool.uv.sources] +earth2grid = { url = "https://github.com/NVlabs/earth2grid/archive/main.tar.gz" } + +##################################################################### +# Flags Controlling the local build of physicsnemo +##################################################################### +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + [tool.hatch.version] path = "physicsnemo/__init__.py" @@ -71,129 +92,76 @@ exclude = [ ] -# The dependency-group tree is critically important for physicsnemo. -# Here, we list_dependencies for each physicsnemo pacakge. Optional -#_dependencies are listed with the name `package`-extras. _Dependencies -# are chained together: for example, everything in core is a dep of the -# entire repo, but utils-extra only shows up for subsequent *-extra -# lists. -# -# We do this to ensure a consistent install path for targeted levels of the -# repository. If you just want the distributed manager, for example, you can -# target `distributed` instead of `physicsnemo` and you're up and running. -# -# These lists are the SINGLE SOURCE OF TRUTH. Models are also included -# below, to make single-model installation easier. -# -# In general, we do not draw a finer line than "required" and "extra". -# So, physicsnemo.nn's requirements do not include scipy and cuml, but the -# "extra" version includes BOTH. +##################################################################### +# Local Development Requirements (pytest, etc) +##################################################################### [dependency-groups] -core = [ - "torch>=2.4.0", - "tqdm>=4.60.0", # done - "requests>=2.32.2", - "GitPython", - "s3fs>=2023.5.0", - "packaging>=24.2", -] -# no core-extras -distributed = [ - "treelib>=1.2.5", - "numpy>=1.22.4", - {include-group = "core"} -] -# no distributed-extras -utils = [ - "termcolor", - "onnx>=1.14.0", - "warp-lang", - "pandas", - "nvtx", - {include-group = "distributed"}, +dev = [ + "import-linter>=2.7", + "interrogate>=1.7.0", + "pre-commit>=4.5.0", + "pytest>=9.0.1", + "ruff>=0.14.8", ] + +##################################################################### +# Optional Dependency groups +##################################################################### + +# These groups are self-referential. So, they chain together as you +# move up the physicsnemo hierarchy. A new extra dep in `physicsnemo/nn` +# will also be an extra dep in `physicsnemo/models`, but not the other +# direction. + +[project.optional-dependencies] utils-extras = [ "wandb", "mlflow", "line_profiler", - "vtx", + "vtk", "stl", "rmm", "cupy", ] -nn = [ - "einops>=0.8.0", - "timm>=1.0.0", - {include-group= "utils"} -] nn-extras = [ "cuml", - "transformer_engine", + "transformer_engine[pytorch]", "scipy", - {include-group = "nn"}, - {include-group = "utils-extras"}, -] -models = [ - "cftime", - "hydra-core", - "omegaconf", - "xarray>=2023.1.0", - "zarr>=2.14.2", - "jaxtyping", - {include-group="nn"}, + "natten", + "nvidia-physicsnemo[utils-extras]", ] -models-extras = [ - "transformer_engine", +model-extras = [ + "nvidia-physicsnemo[nn-extras]", + "transformer_engine[pytorch]", "netCDF4", "pyvista", "vtk", - {include-group="models"}, - {include-group="nn-extras"}, -] -metrics = [ - {include-group="models"}, -] -datapipes = [ - "h5py", - {include-group="metrics"}, ] datapipes-extras = [ "dask", "tensorflow", - {include-group="datapipes"}, - {include-group="models-extras"}, -] -domain_parallel = [ - {include-group = "nn"} -] -physicsnemo = [ - {include-group = "domain-parallel"}, - {include-group = "datapipes"}, -] -physicsnemo-extras = [ - {include-group = "physicsnemo"}, - # {include-group = "models-extras"}, -] -dev = [ - "pytest", - "import-linter" + "nvidia-physicsnemo[model-extras]", ] -[project.optional_dependencies] +# Use case specific dependency groups. gnns = [ "torch_geometric", "torch_scatter", "torch_sparse", "torch_cluster", - "pylibcugraphops", "nvfuser", + "nvidia-physicsnemo[model-extras]", ] - healpix = [ "earth2grid", ] + +##################################################################### +# Linting configuration +##################################################################### + [tool.ruff] # Enable flake8/pycodestyle (`E`), Pyflakes (`F`), flake8-bandit (`S`), # isort (`I`), and performance 'PERF' rules. diff --git a/test/ci_tests/prevent_untracked_imports.py b/test/ci_tests/prevent_untracked_imports.py index aaa90ed659..6277b3a85a 100644 --- a/test/ci_tests/prevent_untracked_imports.py +++ b/test/ci_tests/prevent_untracked_imports.py @@ -19,13 +19,28 @@ import sys import sysconfig from pathlib import Path -from typing import Dict, List, Set, Union +from typing import Dict, List, Set import tomllib from importlinter import Contract, ContractCheck, fields, output from packaging.requirements import Requirement -Dependency = Union[str, Dict[str, str]] +""" +This is a script meant to be used in import-linter as a pre-commit hook to +prevent unlisted / not-required imports as a bare import in physicsnemo. + +It will do the following: +- Scan the entire "container" (configured in .importlinter) for + imports, using import-linter and grimp. Automatic. +- Extract all the "upstream" modules: things that go "import ABC" +- From that list, remove all the upstream modules from the standard library. +- Scan pyproject.toml for the requirements listed in `[project.dependencies]` +- From the upstream list, Remove all the modules listed as a requirement. +- From the remaining list, find all the importers but exclude anything in + `container.e for e in exclude`. +- Pass if all upstream modules are standard or hard requirements. +- Fail otherwise, and report which modules and from what files. +""" # For irregular mappings that we don't want to have cause errors: dep_to_import_name = { @@ -45,12 +60,13 @@ class ForbiddenImportContract(Contract): """ container = fields.StringField() - dependency_group = fields.StringField() + exclude = fields.ListField(fields.StringField(), required=False) def check(self, graph, verbose): output.verbose_print( verbose, - f"Getting import details from {self.container} vs uv group {self.dependency_group}...", + "Getting import details from " + f"{self.container} vs project core dependencies...", ) upstream_modules = graph.find_upstream_modules(self.container, as_package=True) @@ -59,31 +75,46 @@ def check(self, graph, verbose): upstream_modules = set( module for module in upstream_modules - if not module.startswith("physicsnemo") + if not module.startswith(self.container) ) upstream_external_modules = remove_standard_library(upstream_modules) # Now, read the tree from pyproject.toml: - dependency_tree = resolve_dependency_group_no_versions( - Path("pyproject.toml"), self.dependency_group - ) + dependency_tree = resolve_core_dependencies(Path("pyproject.toml")) + # This list hasn't been pruned for excludes: broken_imports = upstream_external_modules - dependency_tree violations = {} + unexcluded_broken_imports = set[str]() + for broken_import in broken_imports: - violations[broken_import] = graph.find_modules_that_directly_import( - broken_import + local_violations = graph.find_modules_that_directly_import(broken_import) + + # Remove violations that start with any exclusions: + + local_violations = set[str]( + lv + for lv in local_violations + if not any( + lv.startswith(self.container + "." + ex) for ex in self.exclude + ) ) - violations[broken_import] = [ - v for v in violations[broken_import] if self.container in v - ] + + if len(local_violations) > 0: + unexcluded_broken_imports.add(broken_import) + + violations[broken_import] = local_violations + + violations[broken_import] = [ + v for v in violations[broken_import] if self.container in v + ] return ContractCheck( - kept=len(broken_imports) == 0, + kept=len(unexcluded_broken_imports) == 0, metadata={ - "broken_imports": list(broken_imports), + "broken_imports": list(unexcluded_broken_imports), "violations": violations, }, ) @@ -119,72 +150,38 @@ def render_broken_contract(self, check): ) output.new_line() - output.print_error("Listing broken imports by internal file...") - output.new_line() - for violating_file, violating_imports in inverted_violations.items(): - output.print_error( - f"{violating_file} is not allowed to import: {', '.join(violating_imports)}", - bold=True, - ) - output.new_line() - - output.print_error("Listing broken imports by internal file...") - output.new_line() - for violating_file, violating_imports in inverted_violations.items(): - output.print_error( - f"{violating_file} is not allowed to import: {', '.join(violating_imports)}", - bold=True, - ) - output.new_line() - output.new_line() - n_file_violations += 1 - output.print_error( f"Found {n_invalid_imports} invalid imports and {n_file_violations} file violations" ) -def resolve_dependency_group_no_versions( - pyproject_path: str | Path, group_name: str -) -> List[str]: +def resolve_core_dependencies(pyproject_path: str | Path) -> Set[str]: """ - Open a uv-style pyproject.toml, recursively resolve a dependency group, - and strip version specifiers from all dependencies. + Load and normalize the dependencies declared under ``[project].dependencies`` + so that we can compare external imports against the canonical list of core + dependencies shipped with the package. """ pyproject_path = Path(pyproject_path) with pyproject_path.open("rb") as f: data = tomllib.load(f) - dep_groups: Dict[str, List[Dependency]] = data.get("dependency-groups", {}) - - if group_name not in dep_groups: - raise KeyError(f"Dependency group '{group_name}' not found") - - def _resolve(group: str, seen: set[str] = None) -> List[str]: - if seen is None: - seen = set() - if group in seen: - return [] - seen.add(group) - deps: List[str] = [] - for item in dep_groups.get(group, []): - if isinstance(item, str): - # strip version using packaging - deps.append(Requirement(item).name) - elif isinstance(item, dict) and "include-group" in item: - deps.extend(_resolve(item["include-group"], seen)) - else: - raise ValueError(f"Unknown dependency format: {item}") - return deps - - # remove duplicates while preserving order - resolved = _resolve(group_name) - - # Convert dep tree names to what they import as: - resolved = [dep_to_import_name.get(d, d) for d in resolved] - - seen_ordered = set() - return set([d for d in resolved if not (d in seen_ordered or seen_ordered.add(d))]) + project_table = data.get("project") or {} + dependency_list: List[str] | None = project_table.get("dependencies") + if dependency_list is None: + raise KeyError("Core dependency list not found under [project].dependencies") + + resolved: List[str] = [] + for item in dependency_list: + requirement = Requirement(item) + resolved.append(dep_to_import_name.get(requirement.name, requirement.name)) + + seen: Set[str] = set() + ordered_unique: List[str] = [] + for dep in resolved: + if dep not in seen: + ordered_unique.append(dep) + seen.add(dep) + return set(ordered_unique) def flatten_deps(tree: Dict) -> Set[str]: