Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .github/workflows/pr-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,20 @@ jobs:
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Run revamped tests in container
- name: Run default fast tests in container
run: |
set -euo pipefail
docker run --rm \
-v "$GITHUB_WORKSPACE:/workspace" \
-w /workspace \
hs2p:${{ github.sha }} \
bash -lc "python -m pip install --no-cache-dir pytest pytest-cov && MPLCONFIGDIR=/tmp/mpl python -m pytest -q tests"

- name: Run fixture integration regressions in container
run: |
set -euo pipefail
docker run --rm \
-v "$GITHUB_WORKSPACE:/workspace" \
-w /workspace \
hs2p:${{ github.sha }} \
bash -lc "python -m pip install --no-cache-dir pytest pytest-cov && MPLCONFIGDIR=/tmp/mpl python -m pytest -q -m integration tests/test_fixture_artifacts_regression.py tests/test_real_fixture_smoke_regression.py"
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ You can also upload your own pyramidal WSI (up to 1 GB).
pip install hs2p
```

Optional CuCIM install for faster tile tar export when using `tiling.backend="cucim"`:
Optional cuCIM install for faster tile tar export when using `tiling.backend="cucim"`:

```bash
pip install cucim-cu12
```

Use the CuCIM wheel that matches your CUDA runtime. The base `hs2p` install does not
require CuCIM.
Use the cuCIM wheel that matches your CUDA runtime. The base `hs2p` install does not
require cuCIM.

## Workflows

Expand Down Expand Up @@ -134,7 +134,7 @@ For a first run, start from [hs2p/configs/default.yaml](hs2p/configs/default.yam
Optional:

- `save_tiles`
- also write `tiles/{sample_id}.tiles.tar` archives; with `tiling.backend="cucim"` this uses batched CuCIM reads during tar extraction
- also write `tiles/{sample_id}.tiles.tar` archives; with `tiling.backend="cucim"` this uses batched CuCIM reads during tar extraction, and other backends coalesce dense `8x8` / `4x4` regions before slicing them back into tiles

Run tiling:

Expand Down
4 changes: 4 additions & 0 deletions docs/artifacts.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ Each successful output produces:
- Native spacing of the level that was read
- `read_tile_size_px`
- Tile width and height at the read level before mapping back to level 0
- `read_step_px`
- Step between neighboring tile origins at the read level
- `tile_size_lv0`
- Tile width and height expressed in level-0 pixels
- `step_px_lv0`
- Step between neighboring tile origins in level-0 pixels
- `overlap`
- Requested overlap fraction between neighboring tiles
- `tissue_threshold`
Expand Down
4 changes: 2 additions & 2 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ When enabled, every candidate tile that passes the tissue mask check is read fro

When `save_tiles: true`, HS2P also writes a `tiles/{sample_id}.tiles.tar` archive with JPEG-encoded tile images.

- For non-CuCIM backends, tar extraction uses the existing sequential reader.
- For non-CuCIM backends, tar extraction still uses the `wholeslidedata` reader, but dense `8x8` and `4x4` tile blocks are coalesced into larger contiguous reads before slicing them back into tiles.
- For `tiling.backend: cucim`, tar extraction uses a CuCIM batch-read fast path and reuses the per-slide worker count from `speed.num_workers`.
- Installing CuCIM is optional. If `backend: cucim` is selected but CuCIM is not installed, HS2P falls back to the sequential export path and emits a warning.
- Installing CuCIM is optional. If `backend: cucim` is selected but CuCIM is not installed, HS2P falls back to the `wholeslidedata` export path and emits a warning.

## Resume and precomputed artifacts

Expand Down
153 changes: 147 additions & 6 deletions hs2p/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class TilingResult:
tissue_threshold: float
num_tiles: int
config_hash: str
read_step_px: int | None = None
step_px_lv0: int | None = None
tissue_fraction: np.ndarray | None = None
annotation: str | None = None
selection_strategy: str | None = None
Expand Down Expand Up @@ -281,6 +283,8 @@ def _compute_tiling_result(
),
)
),
read_step_px=extraction.read_step_px,
step_px_lv0=extraction.step_px_lv0,
selection_strategy=(
CoordinateSelectionStrategy.MERGED_DEFAULT_TILING
if sampling_spec is not None
Expand Down Expand Up @@ -424,7 +428,9 @@ def save_tiling_result(
"read_level": result.read_level,
"read_spacing_um": result.read_spacing_um,
"read_tile_size_px": result.read_tile_size_px,
"read_step_px": result.read_step_px,
"tile_size_lv0": result.tile_size_lv0,
"step_px_lv0": result.step_px_lv0,
"overlap": result.overlap,
"tissue_threshold": result.tissue_threshold,
"num_tiles": result.num_tiles,
Expand Down Expand Up @@ -585,6 +591,8 @@ def extract_tiles_to_tar(
tissue_threshold=result.tissue_threshold,
num_tiles=len(kept),
config_hash=result.config_hash,
read_step_px=result.read_step_px,
step_px_lv0=result.step_px_lv0,
tissue_fraction=(
result.tissue_fraction[kept]
if result.tissue_fraction is not None
Expand Down Expand Up @@ -656,15 +664,138 @@ def _iter_wsd_tile_arrays_for_tar_extraction(
import wholeslidedata as wsd

wsi = wsd.WholeSlideImage(result.image_path, backend=result.backend)
for i in range(result.num_tiles):
yield wsi.get_patch(
int(result.x[i]),
int(result.y[i]),
int(result.read_tile_size_px),
int(result.read_tile_size_px),
read_step_px = _resolve_read_step_px(result)
step_px_lv0 = _resolve_step_px_lv0(result)
for read_plan in _iter_wsd_read_plans_for_tar_extraction(
result=result,
read_step_px=read_step_px,
step_px_lv0=step_px_lv0,
):
region = wsi.get_patch(
int(read_plan.x),
int(read_plan.y),
int(read_plan.read_size_px),
int(read_plan.read_size_px),
spacing=float(result.read_spacing_um),
center=False,
)
region = np.asarray(region)
if read_plan.block_size == 1:
yield region
continue
for x_idx in range(read_plan.block_size):
x0 = x_idx * read_step_px
for y_idx in range(read_plan.block_size):
y0 = y_idx * read_step_px
yield region[
y0 : y0 + int(result.read_tile_size_px),
x0 : x0 + int(result.read_tile_size_px),
]


@dataclass(frozen=True)
class _WSDTarReadPlan:
x: int
y: int
read_size_px: int
block_size: int


def _resolve_read_step_px(result: TilingResult) -> int:
if result.read_step_px is not None:
return int(result.read_step_px)
return max(
1,
int(round(int(result.read_tile_size_px) * (1.0 - float(result.overlap)), 0)),
)


def _resolve_step_px_lv0(result: TilingResult) -> int:
if result.step_px_lv0 is not None:
return int(result.step_px_lv0)
if result.x.size > 1:
unique_x = np.unique(np.sort(result.x.astype(np.int64, copy=False)))
diffs = np.diff(unique_x)
diffs = diffs[diffs > 0]
if diffs.size > 0:
return int(diffs.min())
if result.y.size > 1:
unique_y = np.unique(np.sort(result.y.astype(np.int64, copy=False)))
diffs = np.diff(unique_y)
diffs = diffs[diffs > 0]
if diffs.size > 0:
return int(diffs.min())
return max(
1,
int(round(int(result.tile_size_lv0) * (1.0 - float(result.overlap)), 0)),
)


def _iter_wsd_read_plans_for_tar_extraction(
*,
result: TilingResult,
read_step_px: int,
step_px_lv0: int,
):
if step_px_lv0 <= 0:
step_px_lv0 = int(result.tile_size_lv0)
coord_to_index = {
(int(x), int(y)): idx
for idx, (x, y) in enumerate(
zip(
result.x.astype(np.int64, copy=False).tolist(),
result.y.astype(np.int64, copy=False).tolist(),
)
)
}
consumed = np.zeros(result.num_tiles, dtype=bool)
block_sizes = (8, 4)
tile_size_px = int(result.read_tile_size_px)

for idx in range(result.num_tiles):
if consumed[idx]:
continue
x0 = int(result.x[idx])
y0 = int(result.y[idx])
grouped = False
for block_size in block_sizes:
if result.num_tiles < block_size * block_size:
continue
indices: list[int] = []
for x_idx in range(block_size):
for y_idx in range(block_size):
coord = (
x0 + x_idx * step_px_lv0,
y0 + y_idx * step_px_lv0,
)
match_idx = coord_to_index.get(coord)
if match_idx is None or consumed[match_idx]:
indices = []
break
indices.append(match_idx)
if not indices or len(indices) < (x_idx + 1) * block_size:
break
if not indices:
continue
for match_idx in indices:
consumed[match_idx] = True
yield _WSDTarReadPlan(
x=x0,
y=y0,
read_size_px=tile_size_px + (block_size - 1) * read_step_px,
block_size=block_size,
)
grouped = True
break
if grouped:
continue
consumed[idx] = True
yield _WSDTarReadPlan(
x=x0,
y=y0,
read_size_px=tile_size_px,
block_size=1,
)


def _needs_pixel_filtering(filtering: FilterConfig) -> bool:
Expand Down Expand Up @@ -741,6 +872,16 @@ def load_tiling_result(
tissue_threshold=float(meta["tissue_threshold"]),
num_tiles=int(meta["num_tiles"]),
config_hash=str(meta["config_hash"]),
read_step_px=(
int(meta["read_step_px"])
if meta.get("read_step_px") is not None
else None
),
step_px_lv0=(
int(meta["step_px_lv0"])
if meta.get("step_px_lv0") is not None
else None
),
annotation=(
str(meta["annotation"]) if meta.get("annotation") is not None else None
),
Expand Down
123 changes: 123 additions & 0 deletions hs2p/benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Iterable

import numpy as np

from hs2p.api import (
TilingResult,
_iter_wsd_read_plans_for_tar_extraction,
_resolve_read_step_px,
_resolve_step_px_lv0,
)


@dataclass(frozen=True)
class TileReadPlan:
x: int
y: int
read_size_px: int
block_size: int


def build_read_plans(
result: TilingResult,
*,
use_supertiles: bool,
) -> list[TileReadPlan]:
if not use_supertiles:
tile_size_px = int(result.read_tile_size_px)
return [
TileReadPlan(
x=int(x),
y=int(y),
read_size_px=tile_size_px,
block_size=1,
)
for x, y in zip(
result.x.astype(np.int64, copy=False).tolist(),
result.y.astype(np.int64, copy=False).tolist(),
)
]

read_step_px = _resolve_read_step_px(result)
step_px_lv0 = _resolve_step_px_lv0(result)
return [
TileReadPlan(
x=int(plan.x),
y=int(plan.y),
read_size_px=int(plan.read_size_px),
block_size=int(plan.block_size),
)
for plan in _iter_wsd_read_plans_for_tar_extraction(
result=result,
read_step_px=read_step_px,
step_px_lv0=step_px_lv0,
)
]


def group_read_plans_by_read_size(
plans: Iterable[TileReadPlan],
) -> dict[int, list[TileReadPlan]]:
grouped: dict[int, list[TileReadPlan]] = {}
for plan in plans:
grouped.setdefault(int(plan.read_size_px), []).append(plan)
return grouped


def iter_tiles_from_region(
region: np.ndarray,
plan: TileReadPlan,
*,
tile_size_px: int,
read_step_px: int,
):
region = np.asarray(region)
if plan.block_size == 1:
yield region[:tile_size_px, :tile_size_px]
return
for x_idx in range(plan.block_size):
x0 = x_idx * read_step_px
for y_idx in range(plan.block_size):
y0 = y_idx * read_step_px
yield region[
y0 : y0 + tile_size_px,
x0 : x0 + tile_size_px,
]


def limit_tiling_result(result: TilingResult, *, max_tiles: int) -> TilingResult:
if max_tiles <= 0 or max_tiles >= result.num_tiles:
return result
kept = slice(0, int(max_tiles))
return TilingResult(
sample_id=result.sample_id,
image_path=result.image_path,
mask_path=result.mask_path,
backend=result.backend,
x=result.x[kept],
y=result.y[kept],
tile_index=np.arange(int(max_tiles), dtype=np.int32),
target_spacing_um=result.target_spacing_um,
target_tile_size_px=result.target_tile_size_px,
read_level=result.read_level,
read_spacing_um=result.read_spacing_um,
read_tile_size_px=result.read_tile_size_px,
tile_size_lv0=result.tile_size_lv0,
overlap=result.overlap,
tissue_threshold=result.tissue_threshold,
num_tiles=int(max_tiles),
config_hash=result.config_hash,
read_step_px=result.read_step_px,
step_px_lv0=result.step_px_lv0,
tissue_fraction=(
result.tissue_fraction[kept]
if result.tissue_fraction is not None
else None
),
annotation=result.annotation,
selection_strategy=result.selection_strategy,
output_mode=result.output_mode,
)
Loading
Loading