clemsgrs · clemsgrs · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.github/workflows/pr-test.yaml b/.github/workflows/pr-test.yaml
@@ -55,11 +55,20 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
-      - name: Run revamped tests in container
+      - name: Run default fast tests in container
         run: |
           set -euo pipefail
           docker run --rm \
             -v "$GITHUB_WORKSPACE:/workspace" \
             -w /workspace \
             hs2p:${{ github.sha }} \
             bash -lc "python -m pip install --no-cache-dir pytest pytest-cov && MPLCONFIGDIR=/tmp/mpl python -m pytest -q tests"
+
+      - name: Run fixture integration regressions in container
+        run: |
+          set -euo pipefail
+          docker run --rm \
+            -v "$GITHUB_WORKSPACE:/workspace" \
+            -w /workspace \
+            hs2p:${{ github.sha }} \
+            bash -lc "python -m pip install --no-cache-dir pytest pytest-cov && MPLCONFIGDIR=/tmp/mpl python -m pytest -q -m integration tests/test_fixture_artifacts_regression.py tests/test_real_fixture_smoke_regression.py"
diff --git a/README.md b/README.md
@@ -27,14 +27,14 @@ You can also upload your own pyramidal WSI (up to 1 GB).
 pip install hs2p
 ```
 
-Optional CuCIM install for faster tile tar export when using `tiling.backend="cucim"`:
+Optional cuCIM install for faster tile tar export when using `tiling.backend="cucim"`:
 
 ```bash
 pip install cucim-cu12
 ```
 
-Use the CuCIM wheel that matches your CUDA runtime. The base `hs2p` install does not
-require CuCIM.
+Use the cuCIM wheel that matches your CUDA runtime. The base `hs2p` install does not
+require cuCIM.
 
 ## Workflows
 
@@ -134,7 +134,7 @@ For a first run, start from [hs2p/configs/default.yaml](hs2p/configs/default.yam
 Optional:
 
 - `save_tiles`
-  - also write `tiles/{sample_id}.tiles.tar` archives; with `tiling.backend="cucim"` this uses batched CuCIM reads during tar extraction
+  - also write `tiles/{sample_id}.tiles.tar` archives; with `tiling.backend="cucim"` this uses batched CuCIM reads during tar extraction, and other backends coalesce dense `8x8` / `4x4` regions before slicing them back into tiles
 
 Run tiling:
 

diff --git a/docs/artifacts.md b/docs/artifacts.md
@@ -45,8 +45,12 @@ Each successful output produces:
   - Native spacing of the level that was read
 - `read_tile_size_px`
   - Tile width and height at the read level before mapping back to level 0
+- `read_step_px`
+  - Step between neighboring tile origins at the read level
 - `tile_size_lv0`
   - Tile width and height expressed in level-0 pixels
+- `step_px_lv0`
+  - Step between neighboring tile origins in level-0 pixels
 - `overlap`
   - Requested overlap fraction between neighboring tiles
 - `tissue_threshold`

diff --git a/docs/cli.md b/docs/cli.md
@@ -136,9 +136,9 @@ When enabled, every candidate tile that passes the tissue mask check is read fro
 
 When `save_tiles: true`, HS2P also writes a `tiles/{sample_id}.tiles.tar` archive with JPEG-encoded tile images.
 
-- For non-CuCIM backends, tar extraction uses the existing sequential reader.
+- For non-CuCIM backends, tar extraction still uses the `wholeslidedata` reader, but dense `8x8` and `4x4` tile blocks are coalesced into larger contiguous reads before slicing them back into tiles.
 - For `tiling.backend: cucim`, tar extraction uses a CuCIM batch-read fast path and reuses the per-slide worker count from `speed.num_workers`.
-- Installing CuCIM is optional. If `backend: cucim` is selected but CuCIM is not installed, HS2P falls back to the sequential export path and emits a warning.
+- Installing CuCIM is optional. If `backend: cucim` is selected but CuCIM is not installed, HS2P falls back to the `wholeslidedata` export path and emits a warning.
 
 ## Resume and precomputed artifacts
 

diff --git a/hs2p/api.py b/hs2p/api.py
@@ -98,6 +98,8 @@ class TilingResult:
     tissue_threshold: float
     num_tiles: int
     config_hash: str
+    read_step_px: int | None = None
+    step_px_lv0: int | None = None
     tissue_fraction: np.ndarray | None = None
     annotation: str | None = None
     selection_strategy: str | None = None
@@ -281,6 +283,8 @@ def _compute_tiling_result(
                 ),
             )
         ),
+        read_step_px=extraction.read_step_px,
+        step_px_lv0=extraction.step_px_lv0,
         selection_strategy=(
             CoordinateSelectionStrategy.MERGED_DEFAULT_TILING
             if sampling_spec is not None
@@ -424,7 +428,9 @@ def save_tiling_result(
         "read_level": result.read_level,
         "read_spacing_um": result.read_spacing_um,
         "read_tile_size_px": result.read_tile_size_px,
+        "read_step_px": result.read_step_px,
         "tile_size_lv0": result.tile_size_lv0,
+        "step_px_lv0": result.step_px_lv0,
         "overlap": result.overlap,
         "tissue_threshold": result.tissue_threshold,
         "num_tiles": result.num_tiles,
@@ -585,6 +591,8 @@ def extract_tiles_to_tar(
         tissue_threshold=result.tissue_threshold,
         num_tiles=len(kept),
         config_hash=result.config_hash,
+        read_step_px=result.read_step_px,
+        step_px_lv0=result.step_px_lv0,
         tissue_fraction=(
             result.tissue_fraction[kept]
             if result.tissue_fraction is not None
@@ -656,15 +664,138 @@ def _iter_wsd_tile_arrays_for_tar_extraction(
     import wholeslidedata as wsd
 
     wsi = wsd.WholeSlideImage(result.image_path, backend=result.backend)
-    for i in range(result.num_tiles):
-        yield wsi.get_patch(
-            int(result.x[i]),
-            int(result.y[i]),
-            int(result.read_tile_size_px),
-            int(result.read_tile_size_px),
+    read_step_px = _resolve_read_step_px(result)
+    step_px_lv0 = _resolve_step_px_lv0(result)
+    for read_plan in _iter_wsd_read_plans_for_tar_extraction(
+        result=result,
+        read_step_px=read_step_px,
+        step_px_lv0=step_px_lv0,
+    ):
+        region = wsi.get_patch(
+            int(read_plan.x),
+            int(read_plan.y),
+            int(read_plan.read_size_px),
+            int(read_plan.read_size_px),
             spacing=float(result.read_spacing_um),
             center=False,
         )
+        region = np.asarray(region)
+        if read_plan.block_size == 1:
+            yield region
+            continue
+        for x_idx in range(read_plan.block_size):
+            x0 = x_idx * read_step_px
+            for y_idx in range(read_plan.block_size):
+                y0 = y_idx * read_step_px
+                yield region[
+                    y0 : y0 + int(result.read_tile_size_px),
+                    x0 : x0 + int(result.read_tile_size_px),
+                ]
+
+
+@dataclass(frozen=True)
+class _WSDTarReadPlan:
+    x: int
+    y: int
+    read_size_px: int
+    block_size: int
+
+
+def _resolve_read_step_px(result: TilingResult) -> int:
+    if result.read_step_px is not None:
+        return int(result.read_step_px)
+    return max(
+        1,
+        int(round(int(result.read_tile_size_px) * (1.0 - float(result.overlap)), 0)),
+    )
+
+
+def _resolve_step_px_lv0(result: TilingResult) -> int:
+    if result.step_px_lv0 is not None:
+        return int(result.step_px_lv0)
+    if result.x.size > 1:
+        unique_x = np.unique(np.sort(result.x.astype(np.int64, copy=False)))
+        diffs = np.diff(unique_x)
+        diffs = diffs[diffs > 0]
+        if diffs.size > 0:
+            return int(diffs.min())
+    if result.y.size > 1:
+        unique_y = np.unique(np.sort(result.y.astype(np.int64, copy=False)))
+        diffs = np.diff(unique_y)
+        diffs = diffs[diffs > 0]
+        if diffs.size > 0:
+            return int(diffs.min())
+    return max(
+        1,
+        int(round(int(result.tile_size_lv0) * (1.0 - float(result.overlap)), 0)),
+    )
+
+
+def _iter_wsd_read_plans_for_tar_extraction(
+    *,
+    result: TilingResult,
+    read_step_px: int,
+    step_px_lv0: int,
+):
+    if step_px_lv0 <= 0:
+        step_px_lv0 = int(result.tile_size_lv0)
+    coord_to_index = {
+        (int(x), int(y)): idx
+        for idx, (x, y) in enumerate(
+            zip(
+                result.x.astype(np.int64, copy=False).tolist(),
+                result.y.astype(np.int64, copy=False).tolist(),
+            )
+        )
+    }
+    consumed = np.zeros(result.num_tiles, dtype=bool)
+    block_sizes = (8, 4)
+    tile_size_px = int(result.read_tile_size_px)
+
+    for idx in range(result.num_tiles):
+        if consumed[idx]:
+            continue
+        x0 = int(result.x[idx])
+        y0 = int(result.y[idx])
+        grouped = False
+        for block_size in block_sizes:
+            if result.num_tiles < block_size * block_size:
+                continue
+            indices: list[int] = []
+            for x_idx in range(block_size):
+                for y_idx in range(block_size):
+                    coord = (
+                        x0 + x_idx * step_px_lv0,
+                        y0 + y_idx * step_px_lv0,
+                    )
+                    match_idx = coord_to_index.get(coord)
+                    if match_idx is None or consumed[match_idx]:
+                        indices = []
+                        break
+                    indices.append(match_idx)
+                if not indices or len(indices) < (x_idx + 1) * block_size:
+                    break
+            if not indices:
+                continue
+            for match_idx in indices:
+                consumed[match_idx] = True
+            yield _WSDTarReadPlan(
+                x=x0,
+                y=y0,
+                read_size_px=tile_size_px + (block_size - 1) * read_step_px,
+                block_size=block_size,
+            )
+            grouped = True
+            break
+        if grouped:
+            continue
+        consumed[idx] = True
+        yield _WSDTarReadPlan(
+            x=x0,
+            y=y0,
+            read_size_px=tile_size_px,
+            block_size=1,
+        )
 
 
 def _needs_pixel_filtering(filtering: FilterConfig) -> bool:
@@ -741,6 +872,16 @@ def load_tiling_result(
         tissue_threshold=float(meta["tissue_threshold"]),
         num_tiles=int(meta["num_tiles"]),
         config_hash=str(meta["config_hash"]),
+        read_step_px=(
+            int(meta["read_step_px"])
+            if meta.get("read_step_px") is not None
+            else None
+        ),
+        step_px_lv0=(
+            int(meta["step_px_lv0"])
+            if meta.get("step_px_lv0") is not None
+            else None
+        ),
         annotation=(
             str(meta["annotation"]) if meta.get("annotation") is not None else None
         ),

diff --git a/hs2p/benchmarking.py b/hs2p/benchmarking.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+import numpy as np
+
+from hs2p.api import (
+    TilingResult,
+    _iter_wsd_read_plans_for_tar_extraction,
+    _resolve_read_step_px,
+    _resolve_step_px_lv0,
+)
+
+
+@dataclass(frozen=True)
+class TileReadPlan:
+    x: int
+    y: int
+    read_size_px: int
+    block_size: int
+
+
+def build_read_plans(
+    result: TilingResult,
+    *,
+    use_supertiles: bool,
+) -> list[TileReadPlan]:
+    if not use_supertiles:
+        tile_size_px = int(result.read_tile_size_px)
+        return [
+            TileReadPlan(
+                x=int(x),
+                y=int(y),
+                read_size_px=tile_size_px,
+                block_size=1,
+            )
+            for x, y in zip(
+                result.x.astype(np.int64, copy=False).tolist(),
+                result.y.astype(np.int64, copy=False).tolist(),
+            )
+        ]
+
+    read_step_px = _resolve_read_step_px(result)
+    step_px_lv0 = _resolve_step_px_lv0(result)
+    return [
+        TileReadPlan(
+            x=int(plan.x),
+            y=int(plan.y),
+            read_size_px=int(plan.read_size_px),
+            block_size=int(plan.block_size),
+        )
+        for plan in _iter_wsd_read_plans_for_tar_extraction(
+            result=result,
+            read_step_px=read_step_px,
+            step_px_lv0=step_px_lv0,
+        )
+    ]
+
+
+def group_read_plans_by_read_size(
+    plans: Iterable[TileReadPlan],
+) -> dict[int, list[TileReadPlan]]:
+    grouped: dict[int, list[TileReadPlan]] = {}
+    for plan in plans:
+        grouped.setdefault(int(plan.read_size_px), []).append(plan)
+    return grouped
+
+
+def iter_tiles_from_region(
+    region: np.ndarray,
+    plan: TileReadPlan,
+    *,
+    tile_size_px: int,
+    read_step_px: int,
+):
+    region = np.asarray(region)
+    if plan.block_size == 1:
+        yield region[:tile_size_px, :tile_size_px]
+        return
+    for x_idx in range(plan.block_size):
+        x0 = x_idx * read_step_px
+        for y_idx in range(plan.block_size):
+            y0 = y_idx * read_step_px
+            yield region[
+                y0 : y0 + tile_size_px,
+                x0 : x0 + tile_size_px,
+            ]
+
+
+def limit_tiling_result(result: TilingResult, *, max_tiles: int) -> TilingResult:
+    if max_tiles <= 0 or max_tiles >= result.num_tiles:
+        return result
+    kept = slice(0, int(max_tiles))
+    return TilingResult(
+        sample_id=result.sample_id,
+        image_path=result.image_path,
+        mask_path=result.mask_path,
+        backend=result.backend,
+        x=result.x[kept],
+        y=result.y[kept],
+        tile_index=np.arange(int(max_tiles), dtype=np.int32),
+        target_spacing_um=result.target_spacing_um,
+        target_tile_size_px=result.target_tile_size_px,
+        read_level=result.read_level,
+        read_spacing_um=result.read_spacing_um,
+        read_tile_size_px=result.read_tile_size_px,
+        tile_size_lv0=result.tile_size_lv0,
+        overlap=result.overlap,
+        tissue_threshold=result.tissue_threshold,
+        num_tiles=int(max_tiles),
+        config_hash=result.config_hash,
+        read_step_px=result.read_step_px,
+        step_px_lv0=result.step_px_lv0,
+        tissue_fraction=(
+            result.tissue_fraction[kept]
+            if result.tissue_fraction is not None
+            else None
+        ),
+        annotation=result.annotation,
+        selection_strategy=result.selection_strategy,
+        output_mode=result.output_mode,
+    )