From 64a4a733e4225f2412522ed060a8a8fccf457328 Mon Sep 17 00:00:00 2001 From: clement grisi Date: Fri, 17 Apr 2026 14:43:28 +0200 Subject: [PATCH 1/5] align hs2p 3.3.0 --- README.md | 2 ++ docs/cli.md | 11 +++++++++++ docs/documentation.md | 4 ++++ docs/python-api.md | 8 +++++++- pyproject.toml | 4 ++-- slide2vec/configs/default.yaml | 8 +++++--- tests/fixtures/gt/test-wsi.coordinates.meta.json | 8 +++++--- tests/test_output_consistency.py | 3 +-- tests/test_regression_core.py | 4 ++++ 9 files changed, 41 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 9025d00..88f5d63 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ pip install git+https://github.com/Mahmoodlab/CONCH.git pip install git+https://github.com/prov-gigapath/prov-gigapath.git ``` +AtlasPatch-backed tissue segmentation is available through hs2p's `sam2` path in the bundled install. + ## Python API ```python diff --git a/docs/cli.md b/docs/cli.md index 7677f80..e3802a3 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -138,6 +138,17 @@ slide2vec /path/to/config.yaml speed.num_gpus=4 If you pass `--run-on-cpu`, the CLI uses CPU execution instead. +## Segmentation Notes + +`tiling.seg_params.method` controls how hs2p segments tissue before it extracts coordinates: + +- `hsv` uses the HSV heuristic +- `otsu` thresholds the saturation channel with Otsu +- `threshold` applies a fixed saturation threshold +- `sam2` runs the AtlasPatch SAM2 tissue segmentation path on an internal `8.0 um/px` thumbnail + +When `method: sam2` is selected, `sam2_checkpoint_path` and `sam2_config_path` are optional. If they are left blank, hs2p downloads the default AtlasPatch checkpoint and SAM2 config from Hugging Face. + ## Outputs The CLI writes explicit artifact directories under the run output directory: diff --git a/docs/documentation.md b/docs/documentation.md index a9abd34..988e70b 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -2,6 +2,10 @@ ## 2026-04-17 +- Aligned slide2vec's bundled preprocessing schema with hs2p 3.3.0 by switching the default tissue-segmentation config to the new `method`-based SAM2-capable schema and documenting AtlasPatch-backed `sam2` usage. + +## 2026-04-17 + - Reworked the docs landing page into a product-style hero with action buttons, feature cards, and a summary panel to make the site feel less like a flat index. ## 2026-04-17 diff --git a/docs/python-api.md b/docs/python-api.md index 10c81bd..581f161 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -65,7 +65,10 @@ preprocessing = PreprocessingConfig( requested_spacing_um=0.5, requested_tile_size_px=224, tissue_threshold=0.1, - segmentation={"downsample": 64}, + segmentation={ + "method": "hsv", + "downsample": 64, + }, filtering={"ref_tile_size": 224}, preview={ "save_mask_preview": False, @@ -82,6 +85,7 @@ Common fields: - `requested_tile_size_px` - `tissue_threshold` - `backend` - `"auto"`, `"cucim"`, `"openslide"`, `"vips"`, or `"asap"` +- `segmentation` - forwarded to hs2p's segmentation config; `method` supports `"hsv"`, `"otsu"`, `"threshold"`, or `"sam2"` - `on_the_fly` - read tiles directly from WSI during embedding (default `True`) - `use_supertiles` - group tiles into spatial blocks to reduce WSI read calls (default `True`) - `read_coordinates_from` - reuse pre-extracted coordinates @@ -236,6 +240,8 @@ result = pipeline.run(manifest_path="/path/to/slides.csv") The manifest schema matches HS2P and accepts optional `mask_path` and `spacing_at_level_0` columns. Patient-level models additionally require a `patient_id` column; see [Patient manifest format](models.md#patient-manifest-format). +When you select `segmentation.method="sam2"`, hs2p uses the AtlasPatch tissue segmentation path and can download the default checkpoint/config automatically if you do not provide local paths. + ### Reusing pre-extracted coordinates If you already have tiling coordinates from a previous run, use `run_with_coordinates(...)` to skip the tiling stage: diff --git a/pyproject.toml b/pyproject.toml index bf09b18..f03dad2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "hs2p[asap,cucim,openslide,vips]>=3.2.1", + "hs2p[asap,cucim,openslide,sam2,vips]>=3.3.0", "omegaconf", "matplotlib", "numpy<2", @@ -88,7 +88,7 @@ fm = [ "pandas", "pillow", "rich", - "hs2p[asap,cucim,openslide,vips]>=3.2.1", + "hs2p[asap,cucim,openslide,sam2,vips]>=3.3.0", "wandb", "torch>=2.3,<2.8", "torchvision>=0.18.0", diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml index e9b4c74..75e3973 100644 --- a/slide2vec/configs/default.yaml +++ b/slide2vec/configs/default.yaml @@ -38,12 +38,14 @@ tiling: # downsample controls which pyramid level is read for tissue segmentation. # Larger values are faster and use less memory; smaller values can improve mask precision. downsample: 64 # find the closest downsample in the slide for tissue segmentation - sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when use_otsu=True) + sthresh: 8 # segmentation threshold (positive integer, using a higher threshold leads to less foreground and more background detection) (not used when method="otsu") sthresh_up: 255 # upper threshold value for scaling the binary mask mthresh: 7 # median filter size (positive, odd integer) close: 4 # additional morphological closing to apply following initial thresholding (positive integer) - use_otsu: false # use otsu's method instead of simple binary thresholding - use_hsv: true # use HSV thresholding instead of simple binary thresholding + method: "hsv" # tissue segmentation method: "hsv", "otsu", "threshold", or "sam2" + sam2_checkpoint_path: # optional when method="sam2"; if empty, hs2p downloads the default AtlasPatch checkpoint from Hugging Face + sam2_config_path: # optional local override for the SAM2 model config; if empty, hs2p downloads the default AtlasPatch config from Hugging Face + sam2_device: "cpu" # device for SAM2 inference, e.g. "cpu", "cuda", or "cuda:0" filter_params: ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept) diff --git a/tests/fixtures/gt/test-wsi.coordinates.meta.json b/tests/fixtures/gt/test-wsi.coordinates.meta.json index 0cc3196..b754580 100644 --- a/tests/fixtures/gt/test-wsi.coordinates.meta.json +++ b/tests/fixtures/gt/test-wsi.coordinates.meta.json @@ -38,12 +38,14 @@ "seg_downsample": 64, "seg_level": null, "seg_spacing_um": null, + "sam2_checkpoint_path": null, + "sam2_config_path": null, + "sam2_device": "cpu", + "method": "hsv", "sthresh": 8, "sthresh_up": 255, "tissue_mask_tissue_value": 1, - "tissue_method": "hsv", - "use_hsv": true, - "use_otsu": false + "tissue_method": "hsv" }, "slide": { "base_spacing_um": 0.25200000393750005, diff --git a/tests/test_output_consistency.py b/tests/test_output_consistency.py index 208109b..b00e2dd 100644 --- a/tests/test_output_consistency.py +++ b/tests/test_output_consistency.py @@ -30,8 +30,7 @@ sthresh_up=255, mthresh=7, close=4, - use_otsu=False, - use_hsv=True, + method="hsv", ) # -- tiling.filter_params -- diff --git a/tests/test_regression_core.py b/tests/test_regression_core.py index 2c49168..7e11b5d 100644 --- a/tests/test_regression_core.py +++ b/tests/test_regression_core.py @@ -43,6 +43,10 @@ def test_packaged_preprocessing_config_matches_hs2p_3_tiling_schema(): assert hasattr(cfg.tiling.filter_params, "filter_grayspace") assert hasattr(cfg.tiling.filter_params, "filter_blur") assert hasattr(cfg.tiling.filter_params, "qc_spacing_um") + assert hasattr(cfg.tiling.seg_params, "method") + assert hasattr(cfg.tiling.seg_params, "sam2_checkpoint_path") + assert hasattr(cfg.tiling.seg_params, "sam2_config_path") + assert hasattr(cfg.tiling.seg_params, "sam2_device") def test_get_cfg_from_args_fills_missing_preprocessing_from_single_spacing_model(tmp_path: Path): From 8247aa3c197b136ed2fefae2a0787db4bc738855 Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Sat, 18 Apr 2026 11:20:00 +0000 Subject: [PATCH 2/5] improve ux --- Dockerfile | 2 +- docs/documentation.md | 16 ++ slide2vec/inference.py | 76 +++++--- slide2vec/progress.py | 106 ++++++++++- tasks/lessons.md | 10 ++ tests/test_progress.py | 274 ++++++++++++++++++++++++++++- tests/test_regression_inference.py | 118 ++++++++++++- 7 files changed, 565 insertions(+), 37 deletions(-) diff --git a/Dockerfile b/Dockerfile index f6624bd..4e76c34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -67,7 +67,7 @@ RUN curl -fsSL https://github.com/libjpeg-turbo/libjpeg-turbo/releases/download/ WORKDIR /opt/app/ ARG PYTORCH_CUDA_INDEX_URL=https://download.pytorch.org/whl/cu128 -ARG GIT_MODEL_DEPENDENCIES="git+https://github.com/lilab-stanford/MUSK.git git+https://github.com/Mahmoodlab/CONCH.git git+https://github.com/prov-gigapath/prov-gigapath.git" +ARG GIT_MODEL_DEPENDENCIES="git+https://github.com/lilab-stanford/MUSK.git git+https://github.com/Mahmoodlab/CONCH.git git+https://github.com/prov-gigapath/prov-gigapath.git git+https://github.com/facebookresearch/sam2.git" RUN python -m ensurepip --upgrade \ && python -m pip install --upgrade pip setuptools pip-tools \ diff --git a/docs/documentation.md b/docs/documentation.md index 988e70b..4be0a78 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -1,5 +1,21 @@ # Documentation Log +## 2026-04-18 + +- Split the live tiling UI into a coordinates-extraction bar plus a separate preview-generation bar, and moved the final tiling summary into a dedicated `tiling.summary` event so it prints once at the very end. + +## 2026-04-17 + +- Kept per-slide backend-selection notices, but switched Rich rendering to the console print path used by hs2p so they appear above the live bar without corrupting it. + +## 2026-04-17 + +- Added a selective hs2p progress bridge so slide2vec keeps its own run/config summaries while still surfacing bridged tissue and backend-selection events from upstream tiling. + +## 2026-04-17 + +- Removed slide2vec's extra preflight backend-resolution pass for `backend="auto"` so tiling now relies on hs2p's own resolver once per slide. + ## 2026-04-17 - Aligned slide2vec's bundled preprocessing schema with hs2p 3.3.0 by switching the default tissue-segmentation config to the new `method`-based SAM2-capable schema and documenting AtlasPatch-backed `sam2` usage. diff --git a/slide2vec/inference.py b/slide2vec/inference.py index 8ae0f83..da80f59 100644 --- a/slide2vec/inference.py +++ b/slide2vec/inference.py @@ -18,8 +18,8 @@ import pandas as pd import torch from hs2p import SlideSpec, FilterConfig, PreviewConfig, SegmentationConfig, TilingConfig, load_tiling_result, tile_slides -from hs2p.wsi.backend import resolve_backend -from hs2p.utils.stderr import run_with_filtered_stderr, run_with_filtered_stdio +from hs2p import progress as hs2p_progress +from hs2p.utils.stderr import run_with_filtered_stderr import numpy as np from transformers.image_processing_utils import BaseImageProcessor @@ -52,8 +52,11 @@ from slide2vec.model_settings import canonicalize_model_name from slide2vec.runtime_types import LoadedModel from slide2vec.progress import ( + NullProgressReporter, + ProgressEvent as Slide2VecProgressEvent, emit_progress, emit_progress_event, + get_progress_reporter, read_progress_events, read_tiling_progress_snapshot, ) @@ -81,6 +84,49 @@ class BatchTransformSpec: resize_interpolation: str = "bilinear" +_BRIDGED_HS2P_PROGRESS_KINDS = { + "backend.selected", + "tissue.started", + "tissue.progress", + "tissue.finished", + "tiling.progress", + "tiling.finished", + "preview.started", + "preview.progress", + "preview.finished", +} + + +class _Hs2pProgressBridge: + def __init__(self, downstream) -> None: + self._downstream = downstream + + def emit(self, event) -> None: + if event.kind not in _BRIDGED_HS2P_PROGRESS_KINDS: + return + self._downstream.emit( + Slide2VecProgressEvent(kind=event.kind, payload=dict(event.payload)) + ) + + def close(self) -> None: + return None + + def write_log(self, message: str, *, stream=None) -> None: + if hasattr(self._downstream, "write_log"): + self._downstream.write_log(message, stream=stream) + + +@contextmanager +def _bridge_hs2p_progress_to_slide2vec(): + downstream = get_progress_reporter() + if isinstance(downstream, NullProgressReporter): + yield + return + bridge = _Hs2pProgressBridge(downstream) + with hs2p_progress.activate_progress_reporter(bridge): + yield + + @dataclass(kw_only=True) class PreparedBatch: indices: Any @@ -370,7 +416,7 @@ def embed_slides( output_dir=work_dir, num_workers=execution.num_preprocessing_workers, ) - _emit_tiling_finished( + _emit_tiling_summary( process_list_path, expected_total=len(slide_records), successful_slides=prepared_slides, @@ -561,7 +607,7 @@ def embed_patients( output_dir=work_dir, num_workers=execution.num_preprocessing_workers, ) - _emit_tiling_finished( + _emit_tiling_summary( process_list_path, expected_total=len(slide_records), successful_slides=prepared_slides, @@ -850,7 +896,7 @@ def run_pipeline( output_dir=output_dir, num_workers=execution.num_preprocessing_workers, ) - _emit_tiling_finished( + _emit_tiling_summary( process_list_path, expected_total=len(slide_records), successful_slides=successful_slides, @@ -2624,7 +2670,7 @@ def _num_rows(data) -> int: return len(data) -def _emit_tiling_finished( +def _emit_tiling_summary( process_list_path: Path, *, expected_total: int, @@ -2642,7 +2688,7 @@ def _emit_tiling_finished( discovered_tiles=discovered_tiles, ) emit_progress( - "tiling.finished", + "tiling.summary", total=int(snapshot.total), completed=int(snapshot.completed), failed=int(snapshot.failed), @@ -2770,19 +2816,6 @@ def _tile_slides( ) -> list[Any]: _preload_asap_wholeslidedata(preprocessing) tiling_cfg, segmentation_cfg, filtering_cfg, preview_cfg, read_coordinates_from, resume = _build_hs2p_configs(preprocessing) - for slide in slides: - backend_selection = resolve_backend( - tiling_cfg.requested_backend, - wsi_path=slide.image_path, - mask_path=slide.mask_path, - ) - if backend_selection.reason is not None: - emit_progress( - "backend.selected", - sample_id=slide.sample_id, - backend=backend_selection.backend, - reason=backend_selection.reason, - ) def _run_tile_slides(): return tile_slides( @@ -2799,7 +2832,8 @@ def _run_tile_slides(): jpeg_backend=preprocessing.jpeg_backend, ) - return run_with_filtered_stdio(_run_tile_slides) + with _bridge_hs2p_progress_to_slide2vec(): + return run_with_filtered_stderr(_run_tile_slides) def _preload_asap_wholeslidedata(preprocessing: PreprocessingConfig) -> None: diff --git a/slide2vec/progress.py b/slide2vec/progress.py index 21caba7..4da537e 100644 --- a/slide2vec/progress.py +++ b/slide2vec/progress.py @@ -86,7 +86,7 @@ def emit(self, event: ProgressEvent) -> None: line = self._format_line(kind, payload) if line is None: return - if kind in {"tiling.progress", "embedding.tile.progress"}: + if kind in {"tiling.progress", "preview.progress", "embedding.tile.progress"}: now = time.monotonic() last = self._last_line_by_kind.get(kind) if last is not None and last[1] == line and (now - last[0]) < 1.0: @@ -106,18 +106,47 @@ def _format_line(self, kind: str, payload: dict[str, Any]) -> str | None: f"Starting slide2vec run: {payload['slide_count']} slide(s), " f"model={payload['model_name']} level={payload['level']} output={payload['output_dir']}" ) + if kind == "tissue.started": + return f"Resolving tissue masks ({payload['total']} total)..." + if kind == "tissue.progress": + return ( + f"Tissue resolution: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed" + ) + if kind == "tissue.finished": + return ( + f"Tissue resolution finished: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed" + ) if kind == "tiling.started": return f"Tiling slides ({payload['slide_count']} total)..." if kind == "tiling.progress": return ( f"Tiling progress: {payload['completed']}/{payload['total']} complete, " - f"{payload['failed']} failed, {payload['discovered_tiles']} tiles discovered" + f"{payload['failed']} failed" ) if kind == "tiling.finished": return ( f"Tiling finished: {payload['completed']}/{payload['total']} complete, " f"{payload['failed']} failed, {payload['discovered_tiles']} tiles" ) + if kind == "tiling.summary": + return ( + f"Tiling summary: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed, {payload['discovered_tiles']} tiles" + ) + if kind == "preview.started": + return f"Generating previews ({payload['total']} total)..." + if kind == "preview.progress": + return ( + f"Preview generation: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed" + ) + if kind == "preview.finished": + return ( + f"Preview generation finished: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed" + ) if kind == "model.loading": return f"Loading model {payload['model_name']}..." if kind == "model.ready": @@ -183,11 +212,16 @@ def __init__(self, *, output_dir: str | Path | None = None, console=None) -> Non console=self.console, transient=False, ) - self.progress.start() + self._progress_started = False self._task_ids: dict[str, int] = {} self._model_loading_counts: dict[str, int] = {} self._model_loading_devices: dict[str, set[str]] = {} + def _ensure_progress_started(self) -> None: + if not self._progress_started: + self.progress.start() + self._progress_started = True + def emit(self, event: ProgressEvent) -> None: kind = event.kind payload = event.payload @@ -197,8 +231,36 @@ def emit(self, event: ProgressEvent) -> None: f"for {payload['slide_count']} slide(s)" ) return + if kind == "tissue.started": + self._ensure_progress_started() + self.progress.print(f"Resolving tissue masks ({payload['total']} total)...") + self._task_ids["tissue"] = self.progress.add_task("Resolving tissue masks", total=payload["total"]) + return + if kind == "tissue.progress": + task_id = self._task_ids.get("tissue") + if task_id is not None: + self.progress.update( + task_id, + completed=payload["completed"] + payload["failed"], + description=( + f"Resolving tissue masks ({payload['completed']}/{payload['total']} resolved)" + ), + ) + return + if kind == "tissue.finished": + task_id = self._task_ids.pop("tissue", None) + if task_id is not None: + self.progress.remove_task(task_id) + self.progress.print( + f"Tissue resolution finished: {payload['completed']}/{payload['total']} complete, " + f"{payload['failed']} failed" + ) + return if kind == "tiling.started": + self._ensure_progress_started() self._task_ids["tiling"] = self.progress.add_task("Tiling slides", total=payload["slide_count"]) + self.progress.refresh() + self.progress.print(f"Tiling slides ({payload['slide_count']} total)...") return if kind == "tiling.progress": task_id = self._task_ids.get("tiling") @@ -206,13 +268,18 @@ def emit(self, event: ProgressEvent) -> None: self.progress.update( task_id, completed=payload["completed"] + payload["failed"], - description=f"Tiling slides ({payload['discovered_tiles']} tiles discovered)", + description=f"Tiling slides ({payload['completed']}/{payload['total']} resolved)", ) return if kind == "tiling.finished": task_id = self._task_ids.get("tiling") if task_id is not None: self.progress.update(task_id, completed=payload["completed"] + payload["failed"]) + if task_id is not None: + self.progress.remove_task(task_id) + self._task_ids.pop("tiling", None) + return + if kind == "tiling.summary": self._print_summary( "Tiling Summary", [ @@ -223,7 +290,31 @@ def emit(self, event: ProgressEvent) -> None: ], ) return + if kind == "preview.started": + self._ensure_progress_started() + total = int(payload["total"]) + if total <= 0: + return + self._task_ids["preview"] = self.progress.add_task("Generating previews", total=total) + return + if kind == "preview.progress": + task_id = self._task_ids.get("preview") + if task_id is not None: + self.progress.update( + task_id, + completed=payload["completed"] + payload["failed"], + description=f"Generating previews ({payload['completed']}/{payload['total']} rendered)", + ) + return + if kind == "preview.finished": + task_id = self._task_ids.get("preview") + if task_id is not None: + self.progress.update(task_id, completed=payload["completed"] + payload["failed"]) + self.progress.remove_task(task_id) + self._task_ids.pop("preview", None) + return if kind == "model.loading": + self._ensure_progress_started() model_name = str(payload["model_name"]) count = self._model_loading_counts.get(model_name, 0) + 1 self._model_loading_counts[model_name] = count @@ -273,9 +364,11 @@ def emit(self, event: ProgressEvent) -> None: ) return if kind == "embedding.started": + self._ensure_progress_started() self._task_ids["embedding"] = self.progress.add_task("Embedding slides", total=payload["slide_count"]) return if kind == "embedding.assignment.started": + self._ensure_progress_started() self._task_ids["embedding_assignment"] = self.progress.add_task( f"Assigning slides across {payload['num_gpus']} GPUs", total=None, @@ -291,6 +384,7 @@ def emit(self, event: ProgressEvent) -> None: ) return if kind == "embedding.slide.started": + self._ensure_progress_started() tile_task_key = _progress_task_key("tiles", payload) tile_task = self._task_ids.get(tile_task_key) description = _progress_subject(payload) @@ -314,6 +408,7 @@ def emit(self, event: ProgressEvent) -> None: self.progress.update(task_id, completed=payload["processed"], total=payload["total"]) return if kind == "aggregation.started": + self._ensure_progress_started() aggregation_task_key = _progress_task_key("aggregation", payload) description = f"Aggregating {_progress_subject(payload)}" if aggregation_task_key not in self._task_ids: @@ -367,7 +462,8 @@ def emit(self, event: ProgressEvent) -> None: return def close(self) -> None: - self.progress.stop() + if self._progress_started: + self.progress.stop() def _print_summary(self, title: str, rows: list[tuple[str, str]]) -> None: from rich.panel import Panel diff --git a/tasks/lessons.md b/tasks/lessons.md index 6a19a66..25a9736 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -1,5 +1,15 @@ # Lessons Learned +## 2026-04-18 + +- When slide2vec depends on bridged HS2P progress events, keep the bridge whitelist in sync with every reporter stage the UI renders; otherwise the code can define a preview bar and still never receive preview events. + +## 2026-04-18 + +- Keep `tiling.finished` for closing the live bar and emit the final summary on a separate `tiling.summary` event; otherwise the reporter ends up printing the same panel twice. +- Split coordinate extraction and preview flushing into separate progress stages so the tiling bar stays live through the actual slide work instead of going stale at 0% during preview cleanup. +- When a live progress renderer needs to stay readable, keep backend-selection notices on plain console output and avoid buffering them behind a broad stdout/stderr capture wrapper. + ## 2026-04-12 - When refactoring CLI parsing to support `parse_known_args()`, prefer updating the test double to match the real parser API instead of adding a production fallback for mocks. Keep the runtime code clean unless the fallback is genuinely needed by real callers. diff --git a/tests/test_progress.py b/tests/test_progress.py index cd4941f..930db4b 100644 --- a/tests/test_progress.py +++ b/tests/test_progress.py @@ -34,7 +34,9 @@ def write_log(self, message, *, stream=None): def _install_fake_rich_runtime(monkeypatch): fake_rich = types.ModuleType("rich") fake_console = types.ModuleType("rich.console") + fake_panel = types.ModuleType("rich.panel") fake_progress = types.ModuleType("rich.progress") + fake_table = types.ModuleType("rich.table") class FakeConsole: def __init__(self, file=None, **kwargs): @@ -53,12 +55,14 @@ class FakeProgress: def __init__(self, *args, **kwargs): self.tasks = {} self.next_task_id = 1 + self.console = kwargs.get("console") + self.started = False def start(self): - return None + self.started = True def stop(self): - return None + self.started = False def add_task(self, description, total=None, completed=0, visible=True): task_id = self.next_task_id @@ -77,11 +81,42 @@ def update(self, task_id, **kwargs): def remove_task(self, task_id): self.tasks.pop(task_id, None) + def refresh(self): + return None + def advance(self, task_id, advance=1): completed = self.tasks[task_id]["completed"] if "completed" in self.tasks[task_id] else 0 self.tasks[task_id]["completed"] = completed + advance + def print(self, *args, **kwargs): + if self.console is not None: + self.console.print(*args, **kwargs) + + class FakeTable: + def __init__(self): + self.rows = [] + + @classmethod + def grid(cls, padding=(0, 2)): + return cls() + + def add_column(self, *args, **kwargs): + return None + + def add_row(self, *args): + self.rows.append(args) + + class FakePanel: + @classmethod + def fit(cls, table, title=None, border_style=None): + return { + "table": table, + "title": title, + "border_style": border_style, + } + fake_console.Console = FakeConsole + fake_panel.Panel = FakePanel fake_progress.Progress = FakeProgress fake_progress.BarColumn = lambda *args, **kwargs: None fake_progress.MofNCompleteColumn = lambda *args, **kwargs: None @@ -90,11 +125,16 @@ def advance(self, task_id, advance=1): fake_progress.TextColumn = lambda *args, **kwargs: None fake_progress.TimeElapsedColumn = lambda *args, **kwargs: None fake_progress.TimeRemainingColumn = lambda *args, **kwargs: None + fake_table.Table = FakeTable fake_rich.console = fake_console + fake_rich.panel = fake_panel fake_rich.progress = fake_progress + fake_rich.table = fake_table monkeypatch.setitem(sys.modules, "rich", fake_rich) monkeypatch.setitem(sys.modules, "rich.console", fake_console) + monkeypatch.setitem(sys.modules, "rich.panel", fake_panel) monkeypatch.setitem(sys.modules, "rich.progress", fake_progress) + monkeypatch.setitem(sys.modules, "rich.table", fake_table) return FakeConsole, FakeProgress @@ -205,12 +245,22 @@ def test_run_pipeline_emits_local_progress_events_in_order(monkeypatch, tmp_path "_build_incremental_persist_callback", lambda **kwargs: (None, [], []), ) + def _emit_tiling_summary(*args, **kwargs): + progress.emit_progress( + "tiling.summary", + total=1, + completed=1, + failed=0, + pending=0, + discovered_tiles=2, + ) monkeypatch.setattr( inference, "_collect_pipeline_artifacts", lambda *args, **kwargs: (["tile-artifact"], [], ["slide-artifact"]), ) monkeypatch.setattr(inference, "_update_process_list_after_embedding", lambda *args, **kwargs: None) + monkeypatch.setattr(inference, "_emit_tiling_summary", _emit_tiling_summary) model = SimpleNamespace( name="prov-gigapath", @@ -234,7 +284,7 @@ def test_run_pipeline_emits_local_progress_events_in_order(monkeypatch, tmp_path assert kinds == [ "run.started", "tiling.started", - "tiling.finished", + "tiling.summary", "embedding.started", "embedding.slide.started", "aggregation.started", @@ -287,6 +337,18 @@ def test_run_pipeline_emits_assignment_progress_for_multi_gpu_embedding(monkeypa ) monkeypatch.setattr(inference, "_update_process_list_after_embedding", lambda *args, **kwargs: None) monkeypatch.setattr(inference, "_validate_multi_gpu_execution", lambda *args, **kwargs: None) + monkeypatch.setattr( + inference, + "_emit_tiling_summary", + lambda *args, **kwargs: progress.emit_progress( + "tiling.summary", + total=2, + completed=2, + failed=0, + pending=0, + discovered_tiles=5, + ), + ) model = SimpleNamespace( name="prism", @@ -310,7 +372,7 @@ def test_run_pipeline_emits_assignment_progress_for_multi_gpu_embedding(monkeypa assert kinds == [ "run.started", "tiling.started", - "tiling.finished", + "tiling.summary", "embedding.started", "embedding.assignment.started", "embedding.assignment.finished", @@ -340,6 +402,31 @@ def test_plain_text_reporter_formats_assignment_progress(): ) +def test_plain_text_reporter_formats_tissue_progress(): + import slide2vec.progress as progress + + reporter = progress.PlainTextCliProgressReporter(stream=io.StringIO()) + + assert ( + reporter._format_line("tissue.started", {"total": 3}) + == "Resolving tissue masks (3 total)..." + ) + assert ( + reporter._format_line( + "tissue.progress", + {"total": 3, "completed": 2, "failed": 1}, + ) + == "Tissue resolution: 2/3 complete, 1 failed" + ) + assert ( + reporter._format_line( + "tissue.finished", + {"total": 3, "completed": 3, "failed": 0}, + ) + == "Tissue resolution finished: 3/3 complete, 0 failed" + ) + + def test_run_forward_pass_reports_processed_tile_counts(): torch = pytest.importorskip("torch") import slide2vec.inference as inference @@ -597,6 +684,185 @@ def test_rich_reporter_collapses_multi_gpu_model_loading_into_one_task(monkeypat assert len(console.lines) == 1 +def test_rich_reporter_emits_tissue_progress_lines(monkeypatch): + import slide2vec.progress as progress + + FakeConsole, _FakeProgress = _install_fake_rich_runtime(monkeypatch) + console = FakeConsole() + reporter = progress.RichCliProgressReporter(console=console) + + reporter.emit(progress.ProgressEvent(kind="tissue.started", payload={"total": 3})) + assert reporter.progress.tasks[1]["description"] == "Resolving tissue masks" + assert reporter.progress.tasks[1]["total"] == 3 + reporter.emit( + progress.ProgressEvent( + kind="tissue.progress", + payload={"total": 3, "completed": 2, "failed": 1}, + ) + ) + assert reporter.progress.tasks[1]["completed"] == 3 + assert reporter.progress.tasks[1]["description"] == "Resolving tissue masks (2/3 resolved)" + reporter.emit( + progress.ProgressEvent( + kind="tissue.finished", + payload={"total": 3, "completed": 3, "failed": 0}, + ) + ) + + assert reporter.progress.tasks == {} + assert [line[0] for line in console.lines] == [ + "Resolving tissue masks (3 total)...", + "Tissue resolution finished: 3/3 complete, 0 failed", + ] + + +def test_rich_reporter_defers_tiling_bar_until_progress(monkeypatch): + import slide2vec.progress as progress + + FakeConsole, FakeProgress = _install_fake_rich_runtime(monkeypatch) + console = FakeConsole() + reporter = progress.RichCliProgressReporter(console=console) + + assert reporter.progress.started is False + reporter.emit(progress.ProgressEvent(kind="tiling.started", payload={"slide_count": 8})) + assert reporter.progress.started is True + assert reporter.progress.tasks[1]["description"] == "Tiling slides" + assert reporter.progress.tasks[1]["total"] == 8 + assert [line[0] for line in console.lines] == ["Tiling slides (8 total)..."] + + reporter.emit( + progress.ProgressEvent( + kind="backend.selected", + payload={ + "sample_id": "slide-a", + "backend": "cucim", + "reason": "selected cuCIM for auto backend", + }, + ) + ) + assert [line[0] for line in console.lines] == [ + "Tiling slides (8 total)...", + "[backend] slide-a: selected cuCIM for auto backend", + ] + + reporter.emit(progress.ProgressEvent(kind="tissue.finished", payload={"total": 8, "completed": 8, "failed": 0})) + assert reporter.progress.tasks[1]["total"] == 8 + assert reporter.progress.tasks[1]["description"] == "Tiling slides" + + reporter.emit( + progress.ProgressEvent( + kind="tiling.progress", + payload={ + "total": 8, + "completed": 1, + "failed": 0, + "pending": 7, + "discovered_tiles": 42, + }, + ) + ) + assert reporter.progress.tasks[1]["description"] == "Tiling slides (1/8 resolved)" + + reporter.emit( + progress.ProgressEvent( + kind="tiling.finished", + payload={ + "total": 8, + "completed": 8, + "failed": 0, + "pending": 0, + "discovered_tiles": 42, + }, + ) + ) + assert 1 not in reporter.progress.tasks + + reporter.emit( + progress.ProgressEvent( + kind="tiling.summary", + payload={ + "total": 8, + "completed": 8, + "failed": 0, + "pending": 0, + "discovered_tiles": 42, + }, + ) + ) + assert console.lines[-1][0]["title"] == "Tiling Summary" + + reporter.emit(progress.ProgressEvent(kind="preview.started", payload={"total": 3})) + assert reporter.progress.tasks[2]["description"] == "Generating previews" + assert reporter.progress.tasks[2]["total"] == 3 + reporter.emit( + progress.ProgressEvent( + kind="preview.progress", + payload={"total": 3, "completed": 1, "failed": 0, "pending": 2}, + ) + ) + assert reporter.progress.tasks[2]["description"] == "Generating previews (1/3 rendered)" + reporter.emit( + progress.ProgressEvent( + kind="preview.finished", + payload={"total": 3, "completed": 3, "failed": 0, "pending": 0}, + ) + ) + assert 2 not in reporter.progress.tasks + + +def test_rich_reporter_emits_backend_selected_without_log_suffix(monkeypatch): + import slide2vec.progress as progress + + FakeConsole, _FakeProgress = _install_fake_rich_runtime(monkeypatch) + console = FakeConsole() + reporter = progress.RichCliProgressReporter(console=console) + + reporter.emit( + progress.ProgressEvent( + kind="backend.selected", + payload={ + "sample_id": "slide-a", + "backend": "cucim", + "reason": "selected cuCIM for auto backend", + }, + ) + ) + + assert [line[0] for line in console.lines] == [ + "[backend] slide-a: selected cuCIM for auto backend" + ] + + +def test_rich_reporter_emits_backend_selected_via_console_print(monkeypatch): + import slide2vec.progress as progress + + FakeConsole, _FakeProgress = _install_fake_rich_runtime(monkeypatch) + console = FakeConsole() + reporter = progress.RichCliProgressReporter(console=console) + + def _fail_if_used(*args, **kwargs): + raise AssertionError("backend.selected should not go through Progress.print") + + reporter.progress.print = _fail_if_used + + reporter.emit( + progress.ProgressEvent( + kind="backend.selected", + payload={ + "sample_id": "slide-a", + "backend": "cucim", + "reason": "selected cuCIM for auto backend", + }, + ) + ) + + assert [line[0] for line in console.lines] == [ + "[backend] slide-a: selected cuCIM for auto backend" + ] + + + + def test_jsonl_progress_reporter_tags_worker_events_with_gpu_label(tmp_path: Path): import slide2vec.progress as progress diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py index 5f1982e..a27588b 100644 --- a/tests/test_regression_inference.py +++ b/tests/test_regression_inference.py @@ -945,11 +945,6 @@ def fake_tile_slides(slides, **kwargs): captured["kwargs"] = kwargs monkeypatch.setattr(inference, "tile_slides", fake_tile_slides) - monkeypatch.setattr( - inference, - "resolve_backend", - lambda requested_backend, **kwargs: SimpleNamespace(backend="asap", reason=None, tried=("asap",)), - ) monkeypatch.setattr( inference, "_build_hs2p_configs", @@ -973,6 +968,117 @@ def fake_tile_slides(slides, **kwargs): assert captured["kwargs"]["save_tiles"] is False +def test_tile_slides_does_not_pre_resolve_backend_auto(monkeypatch, tmp_path: Path): + import slide2vec.inference as inference + import slide2vec.progress as progress + from hs2p import progress as hs2p_progress + + class Reporter: + def __init__(self): + self.events = [] + + def emit(self, event): + self.events.append(event) + + def close(self): + return None + + reporter = Reporter() + captured = {} + + def fake_tile_slides(slides, **kwargs): + captured["slides"] = list(slides) + captured["kwargs"] = kwargs + hs2p_progress.emit_progress("tissue.started", total=1) + hs2p_progress.emit_progress( + "tissue.progress", + total=1, + completed=1, + failed=0, + pending=0, + ) + hs2p_progress.emit_progress( + "tissue.finished", + total=1, + completed=1, + failed=0, + pending=0, + ) + hs2p_progress.emit_progress( + "backend.selected", + sample_id="slide-a", + backend="asap", + reason="selected asap for auto backend", + ) + hs2p_progress.emit_progress("tiling.started", total=1) + hs2p_progress.emit_progress( + "tiling.progress", + total=1, + completed=1, + failed=0, + pending=0, + discovered_tiles=1, + ) + hs2p_progress.emit_progress( + "tiling.finished", + total=1, + completed=1, + failed=0, + pending=0, + discovered_tiles=1, + output_dir=str(tmp_path), + process_list_path=str(tmp_path / "process_list.csv"), + zero_tile_successes=0, + ) + hs2p_progress.emit_progress("preview.started", total=1) + hs2p_progress.emit_progress( + "preview.progress", + total=1, + completed=1, + failed=0, + pending=0, + ) + hs2p_progress.emit_progress( + "preview.finished", + total=1, + completed=1, + failed=0, + pending=0, + ) + + assert not hasattr(inference, "resolve_backend") + monkeypatch.setattr(inference, "tile_slides", fake_tile_slides) + monkeypatch.setattr( + inference, + "_build_hs2p_configs", + lambda preprocessing: ( + SimpleNamespace(requested_backend="auto"), + "segmentation", + "filtering", + "preview", + None, + False, + ), + ) + + with progress.activate_progress_reporter(reporter): + inference._tile_slides( + [make_slide("slide-a")], + replace(DEFAULT_PREPROCESSING, backend="auto", on_the_fly=False), + output_dir=tmp_path, + num_workers=0, + ) + + assert captured["slides"][0].sample_id == "slide-a" + assert captured["kwargs"]["preview"] == "preview" + assert [event.kind for event in reporter.events] == [ + "tissue.started", + "tissue.progress", + "tissue.finished", + "backend.selected", + ] + + def test_build_hs2p_configs_constructs_preview_config(monkeypatch): import slide2vec.inference as inference @@ -2525,7 +2631,7 @@ def test_run_pipeline_logs_on_the_fly_worker_override_once(monkeypatch, tmp_path "_prepare_tiled_slides", lambda *args, **kwargs: (slides, tiling_results, tmp_path / "process_list.csv"), ) - monkeypatch.setattr(inference, "_emit_tiling_finished", lambda *args, **kwargs: None) + monkeypatch.setattr(inference, "_emit_tiling_summary", lambda *args, **kwargs: None) monkeypatch.setattr(inference, "_write_zero_tile_embedding_sidecars", lambda *args, **kwargs: None) monkeypatch.setattr( inference, From b8dcbf74bea41c0b456ceec100b28fa0c1395dcd Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Sat, 18 Apr 2026 11:52:04 +0000 Subject: [PATCH 3/5] fix failing test --- tests/test_regression_inference.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py index a27588b..1f86eb5 100644 --- a/tests/test_regression_inference.py +++ b/tests/test_regression_inference.py @@ -1076,6 +1076,11 @@ def fake_tile_slides(slides, **kwargs): "tissue.progress", "tissue.finished", "backend.selected", + "tiling.progress", + "tiling.finished", + "preview.started", + "preview.progress", + "preview.finished", ] From 4f6f3523f1449b41b06cefcdf33121df380cfb65 Mon Sep 17 00:00:00 2001 From: clement grisi Date: Sat, 18 Apr 2026 14:04:59 +0200 Subject: [PATCH 4/5] Align slide2vec with hs2p 4.0.0 --- docs/cli.md | 8 ++++--- docs/documentation.md | 2 ++ docs/python-api.md | 3 ++- slide2vec/api.py | 19 +++++++++------ slide2vec/configs/default.yaml | 5 ++-- slide2vec/inference.py | 12 +++++++++- slide2vec/utils/tiling_io.py | 5 ++++ tests/test_hs2p_package_cutover.py | 10 ++++---- tests/test_regression_core.py | 37 ++++++++++++++++++++++++++---- tests/test_regression_inference.py | 10 +++++++- 10 files changed, 87 insertions(+), 24 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index e3802a3..bc67cd0 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -60,7 +60,7 @@ In practice, the config controls: - preprocessing/tiling parameters - output directory - batch size, workers, precision, and GPU count -- whether to save tiling previews through `tiling.preview.save` +- whether to save mask and tiling previews through `tiling.preview.save_mask_preview` / `tiling.preview.save_tiling_preview` - whether to save tile artifacts alongside slide-level outputs ## Common Overrides @@ -79,7 +79,9 @@ Common overrides: - `output_dir=/path/to/output` - `speed.num_gpus=4` - `speed.num_dataloader_workers=8` (`null` keeps auto mode) -- `tiling.preview.save=true` +- `tiling.preview.save_mask_preview=true` +- `tiling.preview.save_tiling_preview=true` +- `tiling.preview.tissue_contour_color=[157, 219, 129]` - `tiling.params.region_tile_multiple=6` (hierarchical extraction) - `model.name=...` - `model.output_variant=...` @@ -160,7 +162,7 @@ The CLI writes explicit artifact directories under the run output directory: - `slide_embeddings/.pt` or `.npz` - `slide_embeddings/.meta.json` - optional `slide_latents/.pt` or `.npz` -- `process_list.csv` with backend provenance columns (`requested_backend`, `backend`) carried through from hs2p, plus embedding provenance columns (`encoder_name`, `output_variant`, `feature_kind`) once feature artifacts are written +- `process_list.csv` with hs2p provenance columns (`annotation`, `requested_backend`, `backend`) carried through from hs2p, plus embedding provenance columns (`encoder_name`, `output_variant`, `feature_kind`) once feature artifacts are written - the resolved saved config file for the run - `logs/` with the main log plus distributed worker stdout/stderr captures when multi-GPU workers are used diff --git a/docs/documentation.md b/docs/documentation.md index 4be0a78..c4e1f62 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -2,6 +2,8 @@ ## 2026-04-18 +- Aligned slide2vec with hs2p 4.0.0's unified tiling/sampling contract by preserving the new `annotation` column in process lists and translating preview configs to hs2p's `save_mask_preview` / `save_tiling_preview` / `tissue_contour_color` fields. + - Split the live tiling UI into a coordinates-extraction bar plus a separate preview-generation bar, and moved the final tiling summary into a dedicated `tiling.summary` event so it prints once at the very end. ## 2026-04-17 diff --git a/docs/python-api.md b/docs/python-api.md index 581f161..7f25180 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -74,6 +74,7 @@ preprocessing = PreprocessingConfig( "save_mask_preview": False, "save_tiling_preview": False, "downsample": 32, + "tissue_contour_color": (157, 219, 129), }, ) embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing) @@ -91,7 +92,7 @@ Common fields: - `read_coordinates_from` - reuse pre-extracted coordinates - `read_tiles_from` - reuse pre-extracted tile tar archives - `resume` - resume from a previous tiling run (default `False`) -- `preview` +- `preview` - forwarded to hs2p's preview config; `save_mask_preview` and `save_tiling_preview` control whether hs2p writes the two preview images, and `tissue_contour_color` controls the tissue contour RGB color For hierarchical extraction, see the [dedicated section](#hierarchical-feature-extraction) below. diff --git a/slide2vec/api.py b/slide2vec/api.py index a40f5b7..3465634 100644 --- a/slide2vec/api.py +++ b/slide2vec/api.py @@ -72,8 +72,17 @@ def from_config(cls, cfg: Any) -> "PreprocessingConfig": gpu_decode = bool(tiling.gpu_decode) adaptive_batching = bool(tiling.adaptive_batching) preview_cfg = tiling.preview - preview_save = bool(preview_cfg.save) - preview_downsample = int(preview_cfg.downsample) + preview_save = bool(preview_cfg.save_mask_preview) + preview_tiling_save = bool(preview_cfg.save_tiling_preview) + preview_kwargs: dict[str, Any] = { + "save_mask_preview": preview_save, + "save_tiling_preview": preview_tiling_save, + "downsample": int(preview_cfg.downsample), + } + preview_kwargs["tissue_contour_color"] = tuple( + int(channel) for channel in preview_cfg.tissue_contour_color + ) + preview_kwargs["mask_overlay_alpha"] = float(preview_cfg.mask_overlay_alpha) return cls( backend=tiling.backend, requested_spacing_um=float(tiling.params.requested_spacing_um), @@ -104,11 +113,7 @@ def from_config(cls, cfg: Any) -> "PreprocessingConfig": resume=bool(cfg.resume), segmentation=dict(tiling.seg_params), filtering=dict(tiling.filter_params), - preview={ - "save_mask_preview": preview_save, - "save_tiling_preview": preview_save, - "downsample": preview_downsample, - }, + preview=preview_kwargs, ) def with_backend(self, backend: str) -> "PreprocessingConfig": diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml index 75e3973..008fcdd 100644 --- a/slide2vec/configs/default.yaml +++ b/slide2vec/configs/default.yaml @@ -62,9 +62,10 @@ tiling: blur_threshold: 50.0 # minimum blur score (higher is sharper) qc_spacing_um: 2.0 # spacing at which pixel-based QC is evaluated preview: - save: true # save preview images of slide tiling and mask overlays + save_mask_preview: true # save preview images of mask overlays + save_tiling_preview: true # save preview images of tile layouts downsample: 32 # downsample to use for preview rendering - mask_overlay_color: [157, 219, 129] # RGB color used for tissue overlays in batch mask previews + tissue_contour_color: [157, 219, 129] # RGB color used for tissue contours in batch mask previews mask_overlay_alpha: 0.5 # alpha used for tissue overlays in batch mask previews speed: diff --git a/slide2vec/inference.py b/slide2vec/inference.py index da80f59..2d4e52f 100644 --- a/slide2vec/inference.py +++ b/slide2vec/inference.py @@ -2920,6 +2920,16 @@ def _resolve_path_str(value: Any) -> str | None: process_df.to_csv(process_list_path, index=False) +def _build_preview_config(preview: dict[str, Any]) -> PreviewConfig: + return PreviewConfig( + save_mask_preview=bool(preview["save_mask_preview"]), + save_tiling_preview=bool(preview["save_tiling_preview"]), + downsample=int(preview["downsample"]), + tissue_contour_color=tuple(int(channel) for channel in preview["tissue_contour_color"]), + mask_overlay_alpha=float(preview["mask_overlay_alpha"]), + ) + + def _build_hs2p_configs(preprocessing: PreprocessingConfig): requested_tile_size_px = ( preprocessing.requested_region_size_px @@ -2936,7 +2946,7 @@ def _build_hs2p_configs(preprocessing: PreprocessingConfig): ) segmentation_cfg = SegmentationConfig(**dict(preprocessing.segmentation)) filtering_cfg = FilterConfig(**dict(preprocessing.filtering)) - preview_cfg = PreviewConfig(**dict(preprocessing.preview)) + preview_cfg = _build_preview_config(dict(preprocessing.preview)) return ( tiling_cfg, segmentation_cfg, diff --git a/slide2vec/utils/tiling_io.py b/slide2vec/utils/tiling_io.py index d7ed237..3293be7 100644 --- a/slide2vec/utils/tiling_io.py +++ b/slide2vec/utils/tiling_io.py @@ -9,6 +9,7 @@ REQUIRED_MANIFEST_COLUMNS = ("sample_id", "image_path") BASE_PROCESS_COLUMNS = ( "sample_id", + "annotation", "image_path", "mask_path", "requested_backend", @@ -22,6 +23,7 @@ ) BASE_TILING_ORDERED_COLUMNS = ( "sample_id", + "annotation", "image_path", "mask_path", "requested_backend", @@ -39,6 +41,7 @@ ) BASE_EMBEDDING_ORDERED_COLUMNS = ( "sample_id", + "annotation", "image_path", "mask_path", "requested_backend", @@ -150,6 +153,8 @@ def _load_base_process_df(process_list_path: str | Path) -> pd.DataFrame: ) if "spacing_at_level_0" not in df.columns: df["spacing_at_level_0"] = [None] * len(df) + if "annotation" not in df.columns: + df["annotation"] = ["tissue"] * len(df) if "tiles_tar_path" not in df.columns: df["tiles_tar_path"] = [None] * len(df) if "mask_preview_path" not in df.columns: diff --git a/tests/test_hs2p_package_cutover.py b/tests/test_hs2p_package_cutover.py index 599eb5e..b8f1d5f 100644 --- a/tests/test_hs2p_package_cutover.py +++ b/tests/test_hs2p_package_cutover.py @@ -80,13 +80,14 @@ def test_load_tiling_process_df_accepts_hs2p_process_list_columns(tmp_path: Path process_list = tmp_path / "process_list.csv" process_list.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-1,/data/slide-1.svs,/data/slide-1-mask.png,auto,openslide,success,4,/tmp/slide-1.coordinates.npz,/tmp/slide-1.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-1,tissue,/data/slide-1.svs,/data/slide-1-mask.png,auto,openslide,success,4,/tmp/slide-1.coordinates.npz,/tmp/slide-1.coordinates.meta.json,,\n", encoding="utf-8", ) df = helper.load_tiling_process_df(process_list) assert list(df.columns) == [ "sample_id", + "annotation", "image_path", "mask_path", "requested_backend", @@ -112,13 +113,14 @@ def test_load_embedding_process_df_accepts_hs2p_process_list_columns(tmp_path: P process_list = tmp_path / "process_list.csv" process_list.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-1,/data/slide-1.svs,/data/slide-1-mask.png,auto,openslide,success,4,/tmp/slide-1.coordinates.npz,/tmp/slide-1.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-1,tissue,/data/slide-1.svs,/data/slide-1-mask.png,auto,openslide,success,4,/tmp/slide-1.coordinates.npz,/tmp/slide-1.coordinates.meta.json,,\n", encoding="utf-8", ) df = helper.load_embedding_process_df(process_list, include_aggregation_status=True) assert list(df.columns) == [ "sample_id", + "annotation", "image_path", "mask_path", "requested_backend", diff --git a/tests/test_regression_core.py b/tests/test_regression_core.py index 7e11b5d..58d8799 100644 --- a/tests/test_regression_core.py +++ b/tests/test_regression_core.py @@ -35,7 +35,7 @@ def test_resource_loading_uses_packaged_configs(): assert cfg.speed.num_preprocessing_workers is None -def test_packaged_preprocessing_config_matches_hs2p_3_tiling_schema(): +def test_packaged_preprocessing_config_matches_hs2p_4_tiling_schema(): pytest.importorskip("omegaconf") cfg = load_config("default") @@ -47,6 +47,9 @@ def test_packaged_preprocessing_config_matches_hs2p_3_tiling_schema(): assert hasattr(cfg.tiling.seg_params, "sam2_checkpoint_path") assert hasattr(cfg.tiling.seg_params, "sam2_config_path") assert hasattr(cfg.tiling.seg_params, "sam2_device") + assert hasattr(cfg.tiling.preview, "save_mask_preview") + assert hasattr(cfg.tiling.preview, "save_tiling_preview") + assert hasattr(cfg.tiling.preview, "tissue_contour_color") def test_get_cfg_from_args_fills_missing_preprocessing_from_single_spacing_model(tmp_path: Path): @@ -678,7 +681,13 @@ def test_cli_build_model_and_pipeline_delegates_to_public_api(monkeypatch, tmp_p ), seg_params={"downsample": 64}, filter_params={"ref_tile_size": 224}, - preview=SimpleNamespace(save=False, downsample=32), + preview=SimpleNamespace( + save_mask_preview=False, + save_tiling_preview=False, + downsample=32, + tissue_contour_color=(157, 219, 129), + mask_overlay_alpha=0.5, + ), ), ) @@ -899,7 +908,13 @@ def test_preprocessing_config_from_config_preserves_tile_store_dir(): ), seg_params={"downsample": 64}, filter_params={"ref_tile_size": 224}, - preview=SimpleNamespace(save=True, downsample=32), + preview=SimpleNamespace( + save_mask_preview=True, + save_tiling_preview=True, + downsample=32, + tissue_contour_color=(157, 219, 129), + mask_overlay_alpha=0.5, + ), ), ) @@ -933,7 +948,13 @@ def test_preprocessing_config_from_config_uses_explicit_speed_num_cucim_workers( ), seg_params={"downsample": 64}, filter_params={"ref_tile_size": 224}, - preview=SimpleNamespace(save=False, downsample=32), + preview=SimpleNamespace( + save_mask_preview=False, + save_tiling_preview=False, + downsample=32, + tissue_contour_color=(157, 219, 129), + mask_overlay_alpha=0.5, + ), ), ) @@ -965,7 +986,13 @@ def test_preprocessing_config_from_config_disables_gpu_decode_by_default(): ), seg_params={"downsample": 64}, filter_params={"ref_tile_size": 224}, - preview=SimpleNamespace(save=False, downsample=32), + preview=SimpleNamespace( + save_mask_preview=False, + save_tiling_preview=False, + downsample=32, + tissue_contour_color=(157, 219, 129), + mask_overlay_alpha=0.5, + ), ), ) diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py index 1f86eb5..24d85cb 100644 --- a/tests/test_regression_inference.py +++ b/tests/test_regression_inference.py @@ -1117,7 +1117,13 @@ def __init__(self, **kwargs): tissue_threshold=0.1, segmentation={"downsample": 64}, filtering={"ref_tile_size": 224}, - preview={"save_mask_preview": True, "save_tiling_preview": False, "downsample": 32}, + preview={ + "save_mask_preview": True, + "save_tiling_preview": False, + "downsample": 32, + "tissue_contour_color": (157, 219, 129), + "mask_overlay_alpha": 0.5, + }, ) tiling_cfg, segmentation_cfg, filtering_cfg, preview_cfg, read_coordinates_from, resume = ( @@ -1131,6 +1137,8 @@ def __init__(self, **kwargs): "save_mask_preview": True, "save_tiling_preview": False, "downsample": 32, + "tissue_contour_color": (157, 219, 129), + "mask_overlay_alpha": 0.5, } assert read_coordinates_from is None assert resume is False From 77878ecb272a8c9498fea97a764168e6f09365e5 Mon Sep 17 00:00:00 2001 From: clement grisi Date: Sat, 18 Apr 2026 14:37:03 +0200 Subject: [PATCH 5/5] Fix hs2p 4.0.0 CI fixtures --- pyproject.toml | 4 +-- tests/test_regression_inference.py | 54 +++++++++++++++--------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f03dad2..e19d63d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "hs2p[asap,cucim,openslide,sam2,vips]>=3.3.0", + "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0", "omegaconf", "matplotlib", "numpy<2", @@ -88,7 +88,7 @@ fm = [ "pandas", "pillow", "rich", - "hs2p[asap,cucim,openslide,sam2,vips]>=3.3.0", + "hs2p[asap,cucim,openslide,sam2,vips]>=4.0.0", "wandb", "torch>=2.3,<2.8", "torchvision>=0.18.0", diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py index 24d85cb..011825e 100644 --- a/tests/test_regression_inference.py +++ b/tests/test_regression_inference.py @@ -317,8 +317,8 @@ def test_update_process_list_after_embedding_writes_feature_provenance( slide = make_slide("slide-a") process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,asap,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,tbp,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,asap,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,tbp,,\n", encoding="utf-8", ) slide_artifacts = [] @@ -387,8 +387,8 @@ def test_model_embed_slide_updates_process_list_feature_status_and_path_in_distr process_list_path = output_dir / "process_list.csv" process_list_path.parent.mkdir(parents=True, exist_ok=True) process_list_path.write_text( - "sample_id,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", encoding="utf-8", ) slide_record = make_slide("slide-a", image_path=slide_path) @@ -490,9 +490,9 @@ def write_log(self, message, *, stream=None): ) process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-zero,/tmp/slide-zero.svs,,,success,0,/tmp/slide-zero.coordinates.npz,/tmp/slide-zero.coordinates.meta.json,,\n" - "slide-full,/tmp/slide-full.svs,,,success,2,/tmp/slide-full.coordinates.npz,/tmp/slide-full.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-zero,tissue,/tmp/slide-zero.svs,,,success,0,/tmp/slide-zero.coordinates.npz,/tmp/slide-zero.coordinates.meta.json,,\n" + "slide-full,tissue,/tmp/slide-full.svs,,,success,2,/tmp/slide-full.coordinates.npz,/tmp/slide-full.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -716,9 +716,9 @@ def test_run_pipeline_local_branch_persists_completed_slides_before_later_failur ] process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,asap,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,tbp,,\n" - "slide-b,/tmp/slide-b.svs,,asap,asap,,success,1,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,tbp,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,asap,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,tbp,,\n" + "slide-b,tissue,/tmp/slide-b.svs,,asap,asap,,success,1,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,tbp,,\n", encoding="utf-8", ) @@ -767,9 +767,9 @@ def test_run_pipeline_resume_skips_successful_local_embeddings(monkeypatch, tmp_ ] process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,auto,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,success,,\n" - "slide-b,/tmp/slide-b.svs,,auto,asap,,success,1,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,tbp,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,feature_status,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,auto,asap,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,success,,\n" + "slide-b,tissue,/tmp/slide-b.svs,,auto,asap,,success,1,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,tbp,,\n", encoding="utf-8", ) write_tile_embeddings( @@ -837,10 +837,10 @@ def test_run_pipeline_local_persists_completed_embeddings_before_later_slide_fai ] process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,," # spacing_at_level_0 + "sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,," # spacing_at_level_0 "success,2,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n" - "slide-b,/tmp/slide-b.svs,,," + "slide-b,tissue,/tmp/slide-b.svs,,," "success,2,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -1157,8 +1157,8 @@ def test_prepare_tiled_slides_records_spacing_at_level_0_in_process_list(monkeyp process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,asap,asap,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,asap,asap,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -1183,8 +1183,8 @@ def test_prepare_tiled_slides_records_preview_paths_in_process_list(monkeypatch, process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,asap,asap,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,asap,asap,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -1218,8 +1218,8 @@ def test_record_slide_metadata_in_process_list_adds_backend_columns(monkeypatch, process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,auto,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,auto,,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -1335,8 +1335,8 @@ def test_load_successful_tiled_slides_preserves_spacing_at_level_0(monkeypatch, process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,auto,,0.25,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", + "sample_id,annotation,image_path,mask_path,requested_backend,backend,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,auto,,0.25,success,1,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n", encoding="utf-8", ) @@ -1773,10 +1773,10 @@ def test_direct_embed_slides_persists_completed_embeddings_before_later_slide_fa ] process_list_path = tmp_path / "process_list.csv" process_list_path.write_text( - "sample_id,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" - "slide-a,/tmp/slide-a.svs,,," # spacing_at_level_0 + "sample_id,annotation,image_path,mask_path,spacing_at_level_0,tiling_status,num_tiles,coordinates_npz_path,coordinates_meta_path,error,traceback\n" + "slide-a,tissue,/tmp/slide-a.svs,,," # spacing_at_level_0 "success,2,/tmp/slide-a.coordinates.npz,/tmp/slide-a.coordinates.meta.json,,\n" - "slide-b,/tmp/slide-b.svs,,," + "slide-b,tissue,/tmp/slide-b.svs,,," "success,2,/tmp/slide-b.coordinates.npz,/tmp/slide-b.coordinates.meta.json,,\n", encoding="utf-8", )