diff --git a/Dockerfile b/Dockerfile index 8299e8f..6d920d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,16 +42,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /opt/app/ -# you can add any Python dependencies to requirements.in +# core deps live in requirements.in; model runtime extras live in requirements-models.in RUN python -m pip install --upgrade pip setuptools pip-tools \ && rm -rf /home/user/.cache/pip # install slide2vec COPY --chown=user:user requirements.in /opt/app/requirements.in +COPY --chown=user:user requirements-models.in /opt/app/requirements-models.in RUN python -m pip install \ --no-cache-dir \ --no-color \ - --requirement /opt/app/requirements.in \ + --requirement /opt/app/requirements-models.in \ && rm -rf /home/user/.cache/pip COPY --chown=user:user slide2vec /opt/app/slide2vec diff --git a/Dockerfile.ci b/Dockerfile.ci index dcc2582..96a27db 100755 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -48,10 +48,11 @@ RUN python -m pip install --upgrade pip setuptools pip-tools \ && rm -rf /root/.cache/pip COPY --chown=user:user requirements.in /opt/app/requirements.in +COPY --chown=user:user requirements-models.in /opt/app/requirements-models.in RUN python -m pip install \ --no-cache-dir \ --no-color \ - --requirement /opt/app/requirements.in \ + --requirement /opt/app/requirements-models.in \ && rm -rf /root/.cache/pip COPY --chown=user:user slide2vec /opt/app/slide2vec diff --git a/README.md b/README.md index fe4abe7..7c634ac 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,14 @@ pip install slide2vec ``` +Install the full model runtime only when you need embedding/model execution: + +```shell +pip install "slide2vec[models]" +``` + +`slide2vec` now keeps the base install focused on the core package surface and moves the heavier model stack into the optional `models` extra. + ## Python API ```python diff --git a/requirements-models.in b/requirements-models.in new file mode 100644 index 0000000..b55e3e9 --- /dev/null +++ b/requirements-models.in @@ -0,0 +1,32 @@ +-r requirements.in +torch>=2.3,<2.8 +torchvision>=0.18.0 +einops>=0.8.0 +timm>=1.0.3 +huggingface-hub>=0.30.0,<1.0 +environs +einops-exts>=0.0.4 +transformers>=4.53 +sacremoses +xformers>=0.0.31 + +## Hibou +scipy~=1.8.1 +scikit-image~=0.19.3 + +## MUSK & CONCH +git+https://github.com/lilab-stanford/MUSK.git +git+https://github.com/Mahmoodlab/CONCH.git + +## Prov-GigaPath +torchmetrics>=0.10.3 +fvcore +iopath +webdataset +scikit-survival +scikit-learn +fairscale +packaging==23.2 +ninja==1.11.1.1 +psutil<6 +git+https://github.com/prov-gigapath/prov-gigapath.git diff --git a/requirements.in b/requirements.in index 07c9890..dd2bafd 100644 --- a/requirements.in +++ b/requirements.in @@ -1,42 +1,16 @@ omegaconf>=2.3.0 h5py -huggingface-hub>=0.30.0,<1.0 +matplotlib numpy<2 pandas pillow rich tqdm -wandb -torch>=2.3,<2.8 -torchvision>=0.18.0 hs2p>=2.0,<3 +torch +torchvision +transformers +wandb wholeslidedata<0.0.16 -timm>=1.0.3 -einops>=0.8.0 -einops-exts>=0.0.4 -transformers>=4.53 -sacremoses -environs -xformers>=0.0.31 -matplotlib - -## Hibou -scipy~=1.8.1 -scikit-image~=0.19.3 - -## MUSK & CONCH -git+https://github.com/lilab-stanford/MUSK.git -git+https://github.com/Mahmoodlab/CONCH.git - -## Prov-GigaPath -torchmetrics>=0.10.3 -fvcore -iopath -webdataset -scikit-survival -scikit-learn -fairscale -packaging==23.2 -ninja==1.11.1.1 -psutil<6 -git+https://github.com/prov-gigapath/prov-gigapath.git +einops +timm diff --git a/requirements.txt b/requirements.txt index 7672ed6..3fca394 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,16 @@ -timm -wandb -numpy==1.26.1 +hs2p>=2.0,<3 +omegaconf>=2.3.0 +h5py +matplotlib +numpy<2 pandas pillow rich -einops +torch +torchvision +transformers tqdm -omegaconf -wholeslidedata -huggingface_hub -torch==2.1.0 -torchvision==0.16.0 +wandb +wholeslidedata<0.0.16 +einops +timm diff --git a/setup.cfg b/setup.cfg index e5623d6..6bd9078 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,30 +19,30 @@ install_requires = hs2p>=2.0,<3 omegaconf h5py - huggingface-hub + matplotlib numpy<2 pandas pillow rich tqdm - torchvision - wholeslidedata<0.0.16 - matplotlib - timm torch + torchvision transformers - environs - sacremoses - einops - einops-exts - xformers wandb + wholeslidedata<0.0.16 + einops + timm python_requires = >=3.10 zip_safe = no include_package_data = True [options.extras_require] +models = + huggingface-hub + sacremoses + einops-exts + xformers testing = pytest>=6.0 pytest-cov>=2.0 diff --git a/slide2vec/models/models.py b/slide2vec/models/models.py index 20faf1e..3f17d17 100644 --- a/slide2vec/models/models.py +++ b/slide2vec/models/models.py @@ -9,9 +9,9 @@ from timm.data import resolve_data_config from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD from timm.data.transforms_factory import create_transform +from transformers import AutoImageProcessor, AutoModel from torchvision import transforms from torchvision.transforms import v2 -from transformers import AutoImageProcessor, AutoModel import slide2vec.distributed as distributed import slide2vec.models.vision_transformer_dino as vits_dino @@ -231,9 +231,7 @@ def build_encoder(self): raise NotImplementedError def get_transforms(self): - data_config = resolve_data_config( - self.encoder.pretrained_cfg, model=self.encoder - ) + data_config = resolve_data_config(self.encoder.pretrained_cfg, model=self.encoder) transform = create_transform(**data_config) return transform diff --git a/tests/test_dependency_split.py b/tests/test_dependency_split.py new file mode 100644 index 0000000..a41ead4 --- /dev/null +++ b/tests/test_dependency_split.py @@ -0,0 +1,137 @@ +import ast +import configparser +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SETUP_CFG = ROOT / "setup.cfg" +README = ROOT / "README.md" +CORE_REQUIREMENTS = ROOT / "requirements.in" +CORE_REQUIREMENTS_TXT = ROOT / "requirements.txt" +MODELS_REQUIREMENTS = ROOT / "requirements-models.in" + +FOUNDATION_REQUIREMENT_NAMES = { + "huggingface-hub", + "sacremoses", + "xformers", +} + +CORE_RUNTIME_REQUIREMENT_NAMES = { + "einops", + "hs2p", + "matplotlib", + "numpy", + "omegaconf", + "pandas", + "pillow", + "rich", + "torch", + "torchvision", + "transformers", + "tqdm", + "timm", + "wandb", + "wholeslidedata", +} + + +def _load_setup_cfg() -> configparser.ConfigParser: + parser = configparser.ConfigParser() + parser.read(SETUP_CFG, encoding="utf-8") + return parser + + +def _requirement_names(raw_block: str) -> set[str]: + names: set[str] = set() + for line in raw_block.splitlines(): + requirement = line.strip() + if not requirement or requirement.startswith("#") or requirement.startswith("-r "): + continue + match = re.match(r"^[A-Za-z0-9_.-]+", requirement) + assert match is not None, f"Could not parse requirement line: {requirement}" + names.add(match.group(0).replace("_", "-").lower()) + return names + + +def _requirement_lines(raw_block: str) -> dict[str, str]: + lines: dict[str, str] = {} + for raw_line in raw_block.splitlines(): + requirement = raw_line.strip() + if not requirement or requirement.startswith("#") or requirement.startswith("-r "): + continue + match = re.match(r"^[A-Za-z0-9_.-]+", requirement) + assert match is not None, f"Could not parse requirement line: {requirement}" + lines[match.group(0).replace("_", "-").lower()] = requirement + return lines + + +def _top_level_imported_modules(path: Path) -> set[str]: + tree = ast.parse(path.read_text(encoding="utf-8")) + modules: set[str] = set() + for node in tree.body: + if isinstance(node, ast.Import): + modules.update(alias.name.split(".")[0] for alias in node.names) + elif isinstance(node, ast.ImportFrom) and node.module: + modules.add(node.module.split(".")[0]) + return modules + + +def test_setup_cfg_moves_model_runtime_deps_into_models_extra(): + parser = _load_setup_cfg() + + install_requires = _requirement_names(parser["options"]["install_requires"]) + models_extra = _requirement_names(parser["options.extras_require"]["models"]) + + assert FOUNDATION_REQUIREMENT_NAMES.isdisjoint(install_requires) + assert FOUNDATION_REQUIREMENT_NAMES <= models_extra + assert CORE_RUNTIME_REQUIREMENT_NAMES <= install_requires + + +def test_requirements_files_split_core_from_foundation_runtime(): + core_requirements_text = CORE_REQUIREMENTS.read_text(encoding="utf-8") + foundation_requirements_text = MODELS_REQUIREMENTS.read_text(encoding="utf-8") + core_requirements = _requirement_names(core_requirements_text) + foundation_requirements = _requirement_names(foundation_requirements_text) + core_requirement_lines = _requirement_lines(core_requirements_text) + foundation_requirement_lines = _requirement_lines(foundation_requirements_text) + + assert FOUNDATION_REQUIREMENT_NAMES.isdisjoint(core_requirements) + assert FOUNDATION_REQUIREMENT_NAMES <= foundation_requirements + assert CORE_RUNTIME_REQUIREMENT_NAMES <= core_requirements + assert "-r requirements.in" in foundation_requirements_text + assert core_requirement_lines["torch"] == "torch" + assert core_requirement_lines["torchvision"] == "torchvision" + assert core_requirement_lines["einops"] == "einops" + assert core_requirement_lines["timm"] == "timm" + assert core_requirement_lines["transformers"] == "transformers" + assert foundation_requirement_lines["torch"] == "torch>=2.3,<2.8" + assert foundation_requirement_lines["torchvision"] == "torchvision>=0.18.0" + assert foundation_requirement_lines["einops"] == "einops>=0.8.0" + assert foundation_requirement_lines["timm"] == "timm>=1.0.3" + assert foundation_requirement_lines["transformers"] == "transformers>=4.53" + + +def test_requirements_txt_matches_generic_core_runtime_requirements(): + requirement_lines = _requirement_lines(CORE_REQUIREMENTS_TXT.read_text(encoding="utf-8")) + + assert requirement_lines["torch"] == "torch" + assert requirement_lines["torchvision"] == "torchvision" + assert requirement_lines["einops"] == "einops" + assert requirement_lines["timm"] == "timm" + assert requirement_lines["transformers"] == "transformers" + + +def test_readme_documents_core_and_models_installs(): + readme = README.read_text(encoding="utf-8") + + assert 'pip install slide2vec' in readme + assert 'pip install "slide2vec[models]"' in readme + + +def test_tile_dataset_uses_direct_transformers_type_check(): + source = (ROOT / "slide2vec" / "data" / "dataset.py").read_text(encoding="utf-8") + + assert "from transformers.image_processing_utils import BaseImageProcessor" in source + assert "isinstance(self.transforms, BaseImageProcessor)" in source + imported_modules = _top_level_imported_modules(ROOT / "slide2vec" / "models" / "models.py") + assert "transformers" in imported_modules