Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions slide2vec/configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ tiling:
sam2_checkpoint_path: # optional when method="sam2"; if empty, hs2p downloads the default AtlasPatch checkpoint from Hugging Face
sam2_config_path: # optional local override for the SAM2 model config; if empty, hs2p downloads the default AtlasPatch config from Hugging Face
sam2_device: "cpu" # device for SAM2 inference, e.g. "cpu", "cuda", or "cuda:0"
sam2_num_workers: # optional cap on concurrent SAM2 mask-resolution workers; set to 1 to serialize GPU inference and avoid CUDA OOMs
filter_params:
ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing
a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept)
Expand Down
8 changes: 2 additions & 6 deletions slide2vec/configs/resources.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from contextlib import contextmanager
from importlib.resources import as_file, files
from pathlib import Path
from typing import Iterator


def config_resource(*parts: str):
path = files("slide2vec").joinpath("configs")
path = Path(__file__).resolve().parent
for part in parts:
path = path.joinpath(part)
return path.with_suffix(".yaml")
Expand All @@ -21,7 +20,4 @@ def load_config(*parts: str):

@contextmanager
def config_path(*parts: str) -> Iterator[Path]:
resource = config_resource(*parts)
with as_file(resource) as resolved:
yield resolved

yield config_resource(*parts)
21 changes: 10 additions & 11 deletions slide2vec/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@ def embed_slides(
slide_count=len(embeddable_slides),
num_gpus=execution.num_gpus,
)
emit_progress(
"embedding.assignment.finished",
slide_count=len(embeddable_slides),
num_gpus=execution.num_gpus,
)
local_persist_callback = None
if execution.output_dir is not None and execution.num_gpus <= 1:
local_persist_callback, _, _ = _build_incremental_persist_callback(
Expand All @@ -325,12 +330,6 @@ def embed_slides(
work_dir=work_dir,
on_embedded_slide=local_persist_callback,
)
if execution.num_gpus > 1 and len(embeddable_slides) > 1:
emit_progress(
"embedding.assignment.finished",
slide_count=len(embeddable_slides),
num_gpus=execution.num_gpus,
)
if execution.output_dir is not None and execution.num_gpus > 1:
tile_artifacts: list[TileEmbeddingArtifact] = []
hierarchical_artifacts: list[HierarchicalEmbeddingArtifact] = []
Expand Down Expand Up @@ -2311,6 +2310,11 @@ def _run_distributed_embedding_stage(
slide_count=len(successful_slides),
num_gpus=execution.num_gpus,
)
emit_progress(
"embedding.assignment.finished",
slide_count=len(successful_slides),
num_gpus=execution.num_gpus,
)
runtime_distributed.run_torchrun_worker(
module="slide2vec.distributed.pipeline_worker",
num_gpus=execution.num_gpus,
Expand All @@ -2320,11 +2324,6 @@ def _run_distributed_embedding_stage(
progress_events_path=progress_events_path,
popen_factory=runtime_distributed.subprocess.Popen,
)
emit_progress(
"embedding.assignment.finished",
slide_count=len(successful_slides),
num_gpus=execution.num_gpus,
)


def _embed_single_slide_distributed(
Expand Down
17 changes: 15 additions & 2 deletions slide2vec/runtime/batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import torch
from transformers.image_processing_utils import BaseImageProcessor
from torchvision.transforms.functional import to_pil_image

from slide2vec.progress import emit_progress
from slide2vec.runtime.types import LoadedModel
Expand Down Expand Up @@ -154,11 +155,23 @@ def prepare_batch_tensor(image):
return image.float()


def _apply_transform_sample(sample, transforms):
if not torch.is_tensor(sample):
return transforms(sample)
try:
return transforms(sample)
except AttributeError as exc:
message = str(exc)
if "convert" not in message and "Tensor" not in message:
raise
return transforms(to_pil_image(sample.cpu()))


def apply_transforms_itemwise(image, transforms):
if not torch.is_tensor(image) or image.ndim <= 3:
return transforms(image)
return _apply_transform_sample(image, transforms)

transformed_items = [transforms(sample) for sample in image.cpu()]
transformed_items = [_apply_transform_sample(sample, transforms) for sample in image.cpu()]
if not transformed_items:
return image.new_empty((0,), dtype=torch.float32)
if not all(torch.is_tensor(item) for item in transformed_items):
Expand Down
8 changes: 8 additions & 0 deletions tasks/lessons.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@

- When slide2vec depends on bridged HS2P progress events, keep the bridge whitelist in sync with every reporter stage the UI renders; otherwise the code can define a preview bar and still never receive preview events.

## 2026-04-21

- When an itemwise preprocessing fallback must support both tensor-native transforms and PIL-only transforms, retry through PIL only after the tensor path actually fails with a PIL-style attribute error; do not force all tensor samples through PIL up front and break legitimate tensor transforms.

## 2026-04-20

- When a progress bar is only meant to cover scheduling or assignment, emit its `finished` event before the downstream GPU work starts; otherwise the UI makes the orchestration phase look like it is still active while encoding is already running.

## 2026-04-18

- Keep `tiling.finished` for closing the live bar and emit the final summary on a separate `tiling.summary` event; otherwise the reporter ends up printing the same panel twice.
Expand Down
90 changes: 24 additions & 66 deletions tests/test_progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,90 +295,48 @@ def _emit_tiling_summary(*args, **kwargs):
]


def test_run_pipeline_emits_assignment_progress_for_multi_gpu_embedding(monkeypatch, tmp_path: Path):
def test_distributed_embedding_stage_finishes_assignment_before_embedding_starts(
monkeypatch, tmp_path: Path
):
import slide2vec.inference as inference
import slide2vec.progress as progress

reporter = RecordingReporter()

slide_a = SimpleNamespace(
sample_id="slide-a",
image_path=Path("/tmp/slide-a.svs"),
mask_path=None,
spacing_at_level_0=None,
)
slide_b = SimpleNamespace(
sample_id="slide-b",
image_path=Path("/tmp/slide-b.svs"),
mask_path=None,
spacing_at_level_0=None,
)
tiling_a = SimpleNamespace(x=np.array([0, 1]), y=np.array([0, 1]), tile_size_lv0=224)
tiling_b = SimpleNamespace(x=np.array([0, 1, 2]), y=np.array([0, 1, 2]), tile_size_lv0=224)
embedded_a = SimpleNamespace(sample_id="slide-a")
embedded_b = SimpleNamespace(sample_id="slide-b")
def _fake_run_torchrun_worker(*args, **kwargs):
progress.emit_progress(
"embedding.slide.started",
sample_id="slide-a",
total_tiles=5,
progress_label="cuda:0",
)

monkeypatch.setattr(inference.runtime_distributed, "run_torchrun_worker", _fake_run_torchrun_worker)
monkeypatch.setattr(inference.runtime_distributed, "reset_progress_event_logs", lambda *args, **kwargs: None)
monkeypatch.setattr(
inference,
"_prepare_tiled_slides",
lambda *args, **kwargs: ([slide_a, slide_b], [tiling_a, tiling_b], tmp_path / "process_list.csv"),
)
monkeypatch.setattr(
inference,
"_select_embedding_path",
lambda *args, **kwargs: [embedded_a, embedded_b],
)
monkeypatch.setattr(inference, "_persist_embedded_slide", lambda *args, **kwargs: (None, None))
monkeypatch.setattr(inference.runtime_distributed, "run_torchrun_worker", lambda *args, **kwargs: None)
monkeypatch.setattr(
inference,
"_collect_pipeline_artifacts",
lambda *args, **kwargs: (["tile-artifact"], [], ["slide-artifact"]),
)
monkeypatch.setattr(inference, "_update_process_list_after_embedding", lambda *args, **kwargs: None)
monkeypatch.setattr(inference, "_validate_multi_gpu_execution", lambda *args, **kwargs: None)
monkeypatch.setattr(
inference,
"_emit_tiling_summary",
lambda *args, **kwargs: progress.emit_progress(
"tiling.summary",
total=2,
completed=2,
failed=0,
pending=0,
discovered_tiles=5,
),
"_build_pipeline_worker_request_payload",
lambda *args, **kwargs: {},
)

model = SimpleNamespace(
name="prism",
level="slide",
_requested_device="cuda:0",
_load_backend=lambda: SimpleNamespace(),
)
model = SimpleNamespace(name="prism", level="slide", _requested_device="cuda:0")

with progress.activate_progress_reporter(reporter):
result = inference.run_pipeline(
inference._run_distributed_embedding_stage(
model,
slides=[slide_a, slide_b],
successful_slides=[
SimpleNamespace(sample_id="slide-a"),
SimpleNamespace(sample_id="slide-b"),
],
preprocessing=DEFAULT_PREPROCESSING,
execution=inference.ExecutionOptions(output_dir=tmp_path, num_gpus=2, save_tile_embeddings=True),
output_dir=tmp_path,
)

kinds = [event.kind for event in reporter.events]

assert result.tile_artifacts == ["tile-artifact"]
assert result.slide_artifacts == ["slide-artifact"]
assert kinds == [
"run.started",
"tiling.started",
"tiling.summary",
"embedding.started",
"embedding.assignment.started",
"embedding.assignment.finished",
"embedding.finished",
"run.finished",
]
assert kinds.count("embedding.assignment.started") == 1
assert kinds.count("embedding.assignment.finished") == 1
assert kinds.count("embedding.slide.started") == 1


def test_plain_text_reporter_formats_assignment_progress():
Expand Down
5 changes: 3 additions & 2 deletions tests/test_regression_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_packaged_preprocessing_config_matches_hs2p_4_tiling_schema():
assert hasattr(cfg.tiling.seg_params, "sam2_checkpoint_path")
assert hasattr(cfg.tiling.seg_params, "sam2_config_path")
assert hasattr(cfg.tiling.seg_params, "sam2_device")
assert "sam2_num_workers:" in (ROOT / "slide2vec" / "configs" / "default.yaml").read_text()
assert hasattr(cfg.tiling.preview, "save_mask_preview")
assert hasattr(cfg.tiling.preview, "save_tiling_preview")
assert hasattr(cfg.tiling.preview, "tissue_contour_color")
Expand Down Expand Up @@ -388,8 +389,8 @@ def test_cpu_worker_limit_caps_large_cpu_budget_to_sixty_four(monkeypatch):

assert utils.cpu_worker_limit() == 64

def test_execution_options_default_batch_size_is_one():
assert ExecutionOptions().batch_size == 1
def test_execution_options_default_batchis_thirty_two():
assert ExecutionOptions().batch_size == 32

def test_execution_options_default_num_workers_is_auto():
assert ExecutionOptions().num_workers is None
Expand Down
Loading