Skip to content

Commit 0e20de6

Browse files
Fix rechunk bug after spatial query (#861)
* fix rechunk bug after spatial query * code cleanup
1 parent cab8353 commit 0e20de6

File tree

4 files changed

+28
-6
lines changed

4 files changed

+28
-6
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ exclude = [
127127
"docs/_build",
128128
"dist",
129129
"setup.py",
130-
130+
131131
]
132132
line-length = 120
133133
target-version = "py310"

src/spatialdata/_core/query/_utils.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,32 @@ def _process_data_tree_query_result(query_result: DataTree) -> DataTree | None:
140140
d = {k: Dataset({"image": d[k]}) for k in scales_to_keep}
141141
result = DataTree.from_dict(d)
142142

143-
# Rechunk the data to avoid irregular chunks
143+
# rechunk the data to avoid irregular chunks
144+
coords = list(result["scale0"].coords.keys())
145+
result = result.chunk({c: "auto" for c in coords})
146+
147+
from dask.array.core import _check_regular_chunks
148+
149+
# check that the rechunking into regular chunks worked
150+
chunks_still_irregular = False
144151
for scale in result:
145-
result[scale]["image"] = result[scale]["image"].chunk("auto")
152+
data = result[scale]["image"].data
153+
chunks_still_irregular = chunks_still_irregular or not _check_regular_chunks(data.chunks)
154+
155+
if chunks_still_irregular:
156+
# reported here: https://github.com/scverse/spatialdata/issues/821#issuecomment-2632201695
157+
# seemingly due to this bug: https://github.com/dask/dask/issues/11713
158+
CHUNK_SIZE = 1024
159+
rechunk_strategy = {c: CHUNK_SIZE for c in coords}
160+
if "c" in coords:
161+
rechunk_strategy["c"] = result["scale0"]["image"].chunks[0][0]
162+
result = result.chunk(rechunk_strategy)
146163

164+
for scale in result:
165+
data = result[scale]["image"].data
166+
assert _check_regular_chunks(data.chunks), (
167+
f"Chunks are not regular for the {scale} of the queried data: {data.chunks}. Please report this bug."
168+
)
147169
return result
148170

149171

tests/io/test_pyramids_performance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,5 @@ def test_write_image_multiscale_performance(sdata_with_image: SpatialData, tmp_p
8282

8383
actual_num_chunk_writes = zarr_chunk_write_spy.call_count
8484
actual_num_chunk_reads = zarr_chunk_read_spy.call_count
85-
assert actual_num_chunk_writes == num_chunks_all_scales
86-
assert actual_num_chunk_reads == num_chunks_scale0
85+
assert actual_num_chunk_writes == num_chunks_all_scales.item()
86+
assert actual_num_chunk_reads == num_chunks_scale0.item()

0 commit comments

Comments
 (0)