-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtutorial.py
127 lines (104 loc) · 3.77 KB
/
tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Tutorial
========
This page steps through going from a set of 2D image files to a
3D chunked zarr dataset.
"""
import pathlib
import sys
import tempfile
import dask_image.imread
import matplotlib.pyplot as plt
import skimage.color
import skimage.data
import tifffile
from loguru import logger
from pydantic_zarr.v2 import ArraySpec
import stack_to_chunk
# %%
# Generating sample data
# ----------------------
#
# We'll start by generating a set of sample data to downsample.
# To do this we'll just save 35 copies of a grayscale cat to a temporary directory.
data_2d = skimage.color.rgb2gray(skimage.data.cat())
temp_dir = tempfile.TemporaryDirectory()
temp_dir_path = pathlib.Path(temp_dir.name)
slice_dir = temp_dir_path / "slices"
slice_dir.mkdir()
for i in range(35):
tifffile.imwrite(slice_dir / f"{str(i).zfill(3)}.tif", data_2d.T)
plt.imshow(data_2d, cmap="gray")
# %%
# Setting up input
# ----------------
# stack-to-chunk takes a 3D dask array as input. dask provides an interface to lazily
# load each slice as and when it's needed. So although the dask array we create looks
# and behaves like an array, no data is actually read in from the TIFF files at this
# point.
#
# This also makes stack-to-chunk flexible - as long as you can put your 2D images into
# a 3D dask array, they can be used with stack-to-chunk.
#
# For this tutorial, ``dask_image`` provides a convenient way for us to read in all our
# TIFF files:
images = dask_image.imread.imread(str(slice_dir / "*.tif")).T
print(images)
# %%
#
# A few things to note here:
# - We have a single 3D dask array, with the array axes being the x, y, z axes of the
# image.
# - The chunk size of the dask array is ``(nx, ny, 1)`` ie each individual slice
# (corresponding) to each individual file on disk) is a chunk in the dask array.
#
# Running stack-to-chunk
# ----------------------
# The starting point for running ``stack-to-chunk`` is creating a `MultiScaleGroup`.
# This represents a local zarr group that will contain the output multi-scale dataset.
#
# Once we've created it, the ``levels`` property shows that no levels have been added
# to the group yet.
group = stack_to_chunk.MultiScaleGroup(
temp_dir_path / "chunked.ome.zarr",
name="my_zarr_group",
spatial_unit="centimeter",
voxel_size=(3, 4, 5),
array_spec=ArraySpec.from_array(images, chunks=(16, 16, 16)),
)
print(group.levels)
# %%
# The first step in creating new data in the group is to make a copy of the data slices
# without any downsampling. Before doing this lets do a quick check of how much memory
# each process will take up when we run stack-to-chunk:
bytes_per_process = stack_to_chunk.memory_per_process(images, chunk_size=16)
print(f"Each process will use {bytes_per_process / 1e6:.1f} MB")
# %%
# We'll also enable logging here, so we can see that stack-to-chunk provides some
# useful progress messages:
logger.enable("stack_to_chunk")
logger.add(sys.stdout, level="INFO")
# %%
# And finally, lets create our first data copy:
group.add_full_res_data(images, n_processes=1)
# %%
# The levels property can be inspected to show we've added the first level. Ekach level
# is downsampled by a factor of ``2**level``, so level 0 is downsampled by a factor of
# 1, which is just a copy of the original data (as expected).
print(group.levels)
# %%
# Now lets add some downsampling levels:
group.add_downsample_level(1, n_processes=1)
group.add_downsample_level(2, n_processes=1)
group.add_downsample_level(3, n_processes=1)
print(group.levels)
# %%
# The downsampled data can be accessed as `zarr.Array` objects by indexing
# ``group``. As an example, lets plot the third downsampled level:
plt.imshow(group[3][:, :, 0], cmap="gray")
# %%
# Cleanup
# -------
#
# Finally we need to clean up the temporary directory we made earlier.
temp_dir.cleanup()