diff --git a/docs/tutorial/tutorial.py b/docs/tutorial/tutorial.py index 5ac15be..59cd7c1 100644 --- a/docs/tutorial/tutorial.py +++ b/docs/tutorial/tutorial.py @@ -16,6 +16,7 @@ import skimage.data import tifffile from loguru import logger +from pydantic_zarr.v2 import ArraySpec import stack_to_chunk @@ -69,11 +70,13 @@ # Once we've created it, the ``levels`` property shows that no levels have been added # to the group yet. + group = stack_to_chunk.MultiScaleGroup( temp_dir_path / "chunked.ome.zarr", name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), + array_spec=ArraySpec.from_array(images, chunks=(16, 16, 16)), ) print(group.levels) @@ -95,7 +98,6 @@ # %% # And finally, lets create our first data copy: -group.create_initial_dataset(images, chunk_size=16, compressor="default") group.add_full_res_data(images, n_processes=1) # %% diff --git a/pyproject.toml b/pyproject.toml index b4db67d..f225a4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" requires-python = ">=3.11" license = { file = "LICENSE.md" } + authors = [{ email = "d.stansby@ucl.ac.uk", name = "HiP-CT Project" }] classifiers = [ "Operating System :: POSIX", @@ -23,12 +24,13 @@ dependencies = [ "joblib==1.5.0", "loguru==0.7.3", "numpy==2.2.6", + "ome-zarr-models==0.1.6", + "pydantic-zarr==0.7.0", "scikit-image==0.25.1", "zarr==2.18.7", "numcodecs==0.15.1", ] - optional-dependencies = { dev = [ "pre-commit", "tox>=4", @@ -68,9 +70,4 @@ overrides."project.classifiers".inline_arrays = false overrides."tool.coverage.paths.source".inline_arrays = false [tool.uv] -dev-dependencies = [ - "ome-zarr-models@git+https://github.com/BioImageTools/ome-zarr-models-py@0068c1e4fcca35a942260f82a299e05ebb180019", - "pre-commit>=3.8.0", - "pytest-cov>=5.0.0", - "pytest>=8.3.2", -] +dev-dependencies = ["pre-commit>=3.8.0", "pytest-cov>=5.0.0", "pytest>=8.3.2"] diff --git a/src/stack_to_chunk/main.py b/src/stack_to_chunk/main.py index 42cd10e..565c4e3 100644 --- a/src/stack_to_chunk/main.py +++ b/src/stack_to_chunk/main.py @@ -4,7 +4,6 @@ from copy import deepcopy from pathlib import Path -from typing import Any, Literal import numpy as np import zarr @@ -12,7 +11,9 @@ from joblib import Parallel from loguru import logger from numcodecs import blosc -from numcodecs.abc import Codec +from ome_zarr_models.v04 import Image +from ome_zarr_models.v04.axes import Axis +from pydantic_zarr.v2 import ArraySpec from stack_to_chunk._array_helpers import _copy_slab, _downsample_block from stack_to_chunk.ome_ngff import SPATIAL_UNIT, DatasetDict @@ -32,7 +33,7 @@ def memory_per_process(input_data: Array, *, chunk_size: int) -> int: class MultiScaleGroup: """ - A class for creating and interacting with a OME-zarr multi-scale group. + A class for creating and interacting with a OME-Zarr multi-scale group. Parameters ---------- @@ -44,6 +45,9 @@ class MultiScaleGroup: Size of a single voxel, in units of spatial_units. spatial_units : Units of the voxel size. + array_spec : + Specification for initial dataset array. If opening an existing group + does not need to be provided. """ @@ -54,42 +58,54 @@ def __init__( name: str, voxel_size: tuple[float, float, float], spatial_unit: SPATIAL_UNIT, + array_spec: ArraySpec | None = None, ) -> None: + self._store = zarr.DirectoryStore(path) self._path = path self._name = name self._spatial_unit = spatial_unit self._voxel_size = voxel_size if isinstance(path, Path) and not path.exists(): - self._create_zarr_group() + if array_spec is None: + msg = "Group does not already exist, array_spec must be provided" + raise ValueError(msg) + self._create_zarr_group(array_spec) - self._group = zarr.open_group(store=self._path, mode="r+") + self._group = zarr.open_group(store=self._store, mode="r+") - def _create_zarr_group(self) -> None: + def _create_zarr_group(self, array_spec: ArraySpec) -> None: """ Create the zarr group. Saves a reference to the group on the ._group attribute. """ - self._group = zarr.open_group(store=self._path, mode="w") - multiscales: dict[str, Any] = {} - multiscales["version"] = "0.4" - multiscales["name"] = self._name - multiscales["axes"] = [ - {"name": "x", "type": "space", "unit": self._spatial_unit}, - {"name": "y", "type": "space", "unit": self._spatial_unit}, - {"name": "z", "type": "space", "unit": self._spatial_unit}, - ] - multiscales["type"] = "local mean" - multiscales["metadata"] = { - "description": "Downscaled using local mean in 2x2x2 blocks.", - "method": "skimage.measure.block_reduce", - "version": "0.24.0", - "kwargs": {"block_size": 2, "func": "np.mean"}, - } - - multiscales["datasets"] = [] - self._group.attrs["multiscales"] = [multiscales] + self._image: Image = Image.new( + array_specs=[array_spec], + paths=["0"], + axes=[ + Axis(name="x", type="space", unit=self._spatial_unit), + Axis(name="y", type="space", unit=self._spatial_unit), + Axis(name="z", type="space", unit=self._spatial_unit), + ], + name=self._name, + multiscale_type="local mean", + metadata={ + "description": "Downscaled using local mean in 2x2x2 blocks.", + "method": "skimage.measure.block_reduce", + "version": "0.24.0", + "kwargs": {"block_size": 2, "func": "np.mean"}, + }, + scales=[self._voxel_size], + translations=[ + ( + self._voxel_size[0] / 2, + self._voxel_size[1] / 2, + self._voxel_size[2] / 2, + ) + ], + ) + self._image.to_zarr(store=self._store, path="/") @property def levels(self) -> list[int]: @@ -111,41 +127,6 @@ def __getitem__(self, level: int) -> zarr.Array: return self._group[str(level)] - def create_initial_dataset( - self, data: Array, *, chunk_size: int, compressor: Literal["default"] | Codec - ) -> None: - """ - Set up the inital full-resolution dataset. - - Parameters - ---------- - data : - Full input data. Must be 3D, and have a chunksize of ``(nx, ny, 1)``, where - ``(nx, ny)`` is the shape of the input 2D slices. - chunk_size : - Size of chunks in output zarr dataset. - compressor : - Compressor to use when writing data to zarr dataset. - - Raises - ------ - RuntimeError : - If full resolution dataset has already been created. - - """ - if "0" in self._group: - msg = "Full resolution dataset already set up." - raise RuntimeError(msg) - self._group.create_dataset( - name="0", - shape=data.shape, - chunks=(chunk_size, chunk_size, chunk_size), - dtype=data.dtype, - compressor=compressor, - dimension_separator="/", - ) - self._add_level_metadata(0) - def add_full_res_data( self, data: Array, diff --git a/src/stack_to_chunk/tests/test_main.py b/src/stack_to_chunk/tests/test_main.py index 93a566b..d70d484 100644 --- a/src/stack_to_chunk/tests/test_main.py +++ b/src/stack_to_chunk/tests/test_main.py @@ -11,6 +11,7 @@ import ome_zarr_models.v04 import pytest import zarr +from pydantic_zarr.v2 import ArraySpec, dictify_codec from stack_to_chunk import MultiScaleGroup, memory_per_process, open_multiscale_group @@ -40,6 +41,7 @@ def check_full_res_copy(zarr_path: Path, group: zarr.Group, arr: da.Array) -> No {"name": "y", "type": "space", "unit": "centimeter"}, {"name": "z", "type": "space", "unit": "centimeter"}, ], + "coordinateTransformations": None, "datasets": [ { "coordinateTransformations": [ @@ -59,7 +61,8 @@ def check_full_res_copy(zarr_path: Path, group: zarr.Group, arr: da.Array) -> No "type": "local mean", "version": "0.4", } - ] + ], + "omero": None, }, ) @@ -77,19 +80,24 @@ def arr() -> da.Array: def test_workflow(tmp_path: Path, arr: da.Array) -> None: """Basic smoke test of the workflow as a user would use it.""" + chunk_size = 64 + compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2) + zarr_path = tmp_path / "group.ome.zarr" group = MultiScaleGroup( tmp_path / zarr_path, name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), + array_spec=ArraySpec.from_array( + arr, + chunks=(chunk_size, chunk_size, chunk_size), + compressor=dictify_codec(compressor), + ), ) assert zarr_path.exists() - assert group.levels == [] - - compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2) - chunk_size = 64 + assert group.levels == [0] check_zattrs( zarr_path, @@ -101,7 +109,16 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None: {"name": "y", "type": "space", "unit": "centimeter"}, {"name": "z", "type": "space", "unit": "centimeter"}, ], - "datasets": [], + "coordinateTransformations": None, + "datasets": [ + { + "coordinateTransformations": [ + {"scale": [3.0, 4.0, 5.0], "type": "scale"}, + {"translation": [1.5, 2.0, 2.5], "type": "translation"}, + ], + "path": "0", + } + ], "metadata": { "description": "Downscaled using local mean in 2x2x2 blocks.", "kwargs": {"block_size": 2, "func": "np.mean"}, @@ -112,16 +129,12 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None: "type": "local mean", "version": "0.4", } - ] + ], + "omero": None, }, ) assert memory_per_process(arr, chunk_size=chunk_size) == 18282880 - group.create_initial_dataset( - data=arr, - chunk_size=chunk_size, - compressor=compressor, - ) group.add_full_res_data( arr, n_processes=1, @@ -152,6 +165,7 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None: {"name": "y", "type": "space", "unit": "centimeter"}, {"name": "z", "type": "space", "unit": "centimeter"}, ], + "coordinateTransformations": None, "datasets": [ { "coordinateTransformations": [ @@ -178,7 +192,8 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None: "type": "local mean", "version": "0.4", } - ] + ], + "omero": None, }, ) @@ -204,20 +219,21 @@ def test_parallel_copy(tmp_path: Path, arr: da.Array) -> None: Simulates what happens on a compute cluster. """ + compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2) + chunk_size = 64 zarr_path = tmp_path / "group.ome.zarr" group = MultiScaleGroup( tmp_path / zarr_path, name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), + array_spec=ArraySpec.from_array( + arr, + chunks=(chunk_size, chunk_size, chunk_size), + compressor=dictify_codec(compressor), + ), ) - compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2) - chunk_size = 64 - group.create_initial_dataset( - data=arr, - chunk_size=chunk_size, - compressor=compressor, - ) + # Add first slab group.add_full_res_data( arr[:, :, :64], @@ -245,16 +261,16 @@ def test_parallel_copy(tmp_path: Path, arr: da.Array) -> None: def test_wrong_chunksize(tmp_path: Path, arr: da.Array) -> None: zarr_path = tmp_path / "group.ome.zarr" + chunk_size = 64 group = MultiScaleGroup( tmp_path / zarr_path, name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), - ) - group.create_initial_dataset( - data=arr, - chunk_size=64, - compressor="default", + array_spec=ArraySpec.from_array( + arr, + chunks=(chunk_size, chunk_size, chunk_size), + ), ) with pytest.raises( @@ -277,11 +293,7 @@ def test_known_data(tmp_path: Path) -> None: name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), - ) - group.create_initial_dataset( - data=arr, - chunk_size=1, - compressor="default", + array_spec=ArraySpec.from_array(arr, chunks=(1, 1, 1)), ) group.add_full_res_data(arr, n_processes=1) group.add_downsample_level(1, n_processes=1) @@ -301,11 +313,7 @@ def test_padding(tmp_path: Path) -> None: name="my_zarr_group", spatial_unit="centimeter", voxel_size=(3, 4, 5), - ) - group.create_initial_dataset( - data=arr, - chunk_size=1, - compressor="default", + array_spec=ArraySpec.from_array(arr, chunks=(1, 1, 1)), ) group.add_full_res_data(arr, n_processes=1) group.add_downsample_level(1, n_processes=1) @@ -313,59 +321,6 @@ def test_padding(tmp_path: Path) -> None: np.testing.assert_equal(arr_downsammpled[:], [[[3]], [[12]]]) -def test_metadata_sorting(tmp_path: Path) -> None: - # Check that metadata levels added in the wrong order (for some reason...) - # are sorted from low to high. - zarr_path = tmp_path / "group.ome.zarr" - group = MultiScaleGroup( - zarr_path, - name="my_zarr_group", - spatial_unit="centimeter", - voxel_size=(3, 4, 5), - ) - group._add_level_metadata(1) # noqa: SLF001 - group._add_level_metadata(0) # noqa: SLF001 - check_zattrs( - zarr_path, - { - "multiscales": [ - { - "axes": [ - {"name": "x", "type": "space", "unit": "centimeter"}, - {"name": "y", "type": "space", "unit": "centimeter"}, - {"name": "z", "type": "space", "unit": "centimeter"}, - ], - "datasets": [ - { - "coordinateTransformations": [ - {"scale": [3.0, 4.0, 5.0], "type": "scale"}, - {"translation": [1.5, 2.0, 2.5], "type": "translation"}, - ], - "path": "0", - }, - { - "coordinateTransformations": [ - {"scale": [6.0, 8.0, 10.0], "type": "scale"}, - {"translation": [3.0, 4.0, 5.0], "type": "translation"}, - ], - "path": "1", - }, - ], - "metadata": { - "description": "Downscaled using local mean in 2x2x2 blocks.", - "kwargs": {"block_size": 2, "func": "np.mean"}, - "method": "skimage.measure.block_reduce", - "version": "0.24.0", - }, - "name": "my_zarr_group", - "type": "local mean", - "version": "0.4", - } - ] - }, - ) - - def test_fix_transform_order(tmp_path: Path) -> None: zarr_path = (tmp_path / "zarr_group").resolve() group = zarr.open_group(zarr_path, mode="w")