diff --git a/cellbrowser_tools/bin/show_info.py b/cellbrowser_tools/bin/show_info.py new file mode 100644 index 0000000..40bcb09 --- /dev/null +++ b/cellbrowser_tools/bin/show_info.py @@ -0,0 +1,73 @@ +from bioio import BioImage +import argparse +import logging +import sys +import traceback + +from datetime import datetime +from logging import FileHandler, StreamHandler, Formatter + + +class Args(argparse.Namespace): + def __init__(self): + super().__init__() + self.fms_id = "" + # + self.__parse() + + def __parse(self): + p = argparse.ArgumentParser( + prog="show_info", + description="Display the dimensions and channel names for all scenes in a file", + ) + p.add_argument( + "imagepath", + type=str, + default="", + ) + p.parse_args(namespace=self) + + +############################################################################### + + +def configure_logging(debug: bool): + f = Formatter(fmt="[%(asctime)s][%(levelname)s] %(message)s") + streamHandler = StreamHandler() + streamHandler.setFormatter(f) + log = logging.getLogger() # root logger + log.handlers = [streamHandler] # overwrite handlers + log.setLevel(logging.DEBUG if debug else logging.INFO) + + +def main(): + args = Args() + configure_logging(False) + log = logging.getLogger(__name__) + + try: + imagepath = args.imagepath + # Load the image + image = BioImage(imagepath) + s = image.scenes + print(imagepath) + for s in image.scenes: + image.set_scene(s) + print(f"Scene {s}") + print(f" Shape: {image.shape}, dtype: {image.dtype}") + print(f" Channel names: {image.channel_names}") + + except Exception as e: + log.error("=============================================") + log.error("\n\n" + traceback.format_exc()) + log.error("=============================================") + log.error("\n\n" + str(e) + "\n") + log.error("=============================================") + sys.exit(1) + + +############################################################################### +# Allow caller to directly run this module (usually in development scenarios) + +if __name__ == "__main__": + main() diff --git a/cellbrowser_tools/ome_zarr_writer.py b/cellbrowser_tools/ome_zarr_writer.py index 6347864..260b553 100644 --- a/cellbrowser_tools/ome_zarr_writer.py +++ b/cellbrowser_tools/ome_zarr_writer.py @@ -1,5 +1,5 @@ import zarr -from zarr.storage import DirectoryStore, FSStore +from zarr.storage import DirectoryStore, FSStore, default_compressor from typing import List, Tuple, Any from dataclasses import dataclass, asdict @@ -300,7 +300,7 @@ def __init__(self): self.store : zarr.Store = None self.root : zarr.hierarchy.Group = None - def init_store(self, output_path:str, shapes:List[Tuple[int]], chunk_sizes:List[Tuple[int]], dtype:np.dtype): + def init_store(self, output_path:str, shapes:List[Tuple[int]], chunk_sizes:List[Tuple[int]], dtype:np.dtype, compressor = default_compressor): """ Initialize the store. :param output_path: The output path. If it begins with "s3://" or "gs://", it is assumed to be a remote store. Credentials required to be provided externally. @@ -323,15 +323,15 @@ def init_store(self, output_path:str, shapes:List[Tuple[int]], chunk_sizes:List[ # create a group with all the levels self.root = zarr.group(store=self.store, overwrite=True) # pre-create all levels here? - self._create_levels(self.root) + self._create_levels(root=self.root, level_shapes=shapes, level_chunk_sizes=chunk_sizes, dtype=dtype, compressor=compressor) - def _create_levels(self, root, level_shapes, level_chunk_sizes, dtype): + def _create_levels(self, root, level_shapes, level_chunk_sizes, dtype, compressor=default_compressor): self.levels = [] for i in range(len(level_shapes)): lvl = root.zeros( - str(i), shape=level_shapes[i], chunks=level_chunk_sizes[i], dtype=dtype - ) + str(i), shape=level_shapes[i], chunks=level_chunk_sizes[i], dtype=dtype, compressor=compressor + ) if root is not None else None level = ZarrLevel(level_shapes[i], level_chunk_sizes[i], dtype, lvl) self.levels.append(level) @@ -340,23 +340,24 @@ def _downsample_and_write_batch_t(self, data_tczyx: da.Array, start_t: int, end_ dtype = data_tczyx.dtype if len(data_tczyx.shape) != 5: raise ValueError("data_tczyx must be 5D") - if len(data_tczyx[0]) != end_t-start_t: + if len(data_tczyx) != end_t-start_t: raise ValueError("data_tczyx must have the same T length as end_t-start_t") # write level 0 first data_tczyx = data_tczyx.persist() - data_tczyx.compute() + # data_tczyx.compute() for k in range(start_t, end_t): self.levels[0].zarray[k] = data_tczyx[k - start_t] # downsample to next level then write for j in range(1, len(self.levels)): # downsample to next level - nextshape = self.levels[j].shape + nextshape = (end_t-start_t,)+self.levels[j].shape[1:] data_tczyx = resize(data_tczyx, nextshape, order=0) data_tczyx = data_tczyx.astype(dtype) data_tczyx = data_tczyx.persist() - data_tczyx.compute() + # data_tczyx.compute() + # write ti to zarr # for some reason this is not working: not allowed to write in this way to a non-memory store # lvls[j][start_t:end_t] = ti[:] @@ -369,7 +370,7 @@ def _downsample_and_write_batch_t(self, data_tczyx: da.Array, start_t: int, end_ log.info(f"Completed {start_t} to {end_t}") - def write_t_batches(self, im: BioImage, tbatch:int=4): + def write_t_batches(self, im: BioImage, channels: List[int], tbatch:int=4, debug:bool=False): """ Write the image in batches of T. :param im: The BioImage object. @@ -377,36 +378,40 @@ def write_t_batches(self, im: BioImage, tbatch:int=4): """ # loop over T in batches numT = im.dims.T + if debug: + numT = np.min([5, numT]) log.info("Starting loop over T") - for i in range(numT // tbatch): - start_t = i * tbatch - end_t = min((i + 1) * tbatch, numT) - # assume start t and end t are in range (caller should guarantee this) - ti = im.get_image_dask_data("TCZYX", T=slice(start_t, end_t)) - self._downsample_and_write_batch_t(ti, start_t, end_t) + for i in np.arange(0, numT+1, tbatch): + start_t = i + end_t = min(i + tbatch, numT) + if end_t > start_t: + # assume start t and end t are in range (caller should guarantee this) + ti = im.get_image_dask_data("TCZYX", T=slice(start_t, end_t), C=channels) + self._downsample_and_write_batch_t(ti, start_t, end_t) log.info("Finished loop over T") - def write_t_batches_image_sequence(self, paths: List[str], tbatch:int=4): + def write_t_batches_image_sequence(self, paths: List[str], channels: List[int], tbatch:int=4, debug:bool=False): """ Write the image in batches of T. :param paths: The list of file paths, one path per T. :param tbatch: The number of T to write at a time. """ # loop over T in batches - numT = im.dims.T + numT = len(paths) + if debug: + numT = np.min([5, numT]) log.info("Starting loop over T") - for i in range(numT // tbatch): - start_t = i * tbatch - end_t = min((i + 1) * tbatch, numT) - - # read batch into dask array - ti = [] - for j in range(start_t, end_t): - im = BioImage(paths[j]) - ti.append(im.get_image_dask_data("CZYX")) - ti = da.stack(ti, axis=0) - - self._downsample_and_write_batch_t(ti, start_t, end_t) + for i in np.arange(0, numT+1, tbatch): + start_t = i + end_t = min(i + tbatch, numT) + if end_t > start_t: + # read batch into dask array + ti = [] + for j in range(start_t, end_t): + im = BioImage(paths[j]) + ti.append(im.get_image_dask_data("CZYX", C=channels)) + ti = da.stack(ti, axis=0) + self._downsample_and_write_batch_t(ti, start_t, end_t) log.info("Finished loop over T") def _get_scale_ratio(self, level:int)->Tuple[float]: @@ -418,20 +423,20 @@ def _get_scale_ratio(self, level:int)->Tuple[float]: lvl0_shape[3]/lvl_shape[3], lvl0_shape[4]/lvl_shape[4]) - def write_metadata(self, - image_name:str, - channel_names:List[str], - physical_dims:dict, # {"x":0.1, "y", 0.1, "z", 0.3, "t": 5.0} - physical_units:dict, # {"x":"micrometer", "y":"micrometer", "z":"micrometer", "t":"minute"} - ): + def generate_metadata(self, + image_name:str, + channel_names:List[str], + physical_dims:dict, # {"x":0.1, "y", 0.1, "z", 0.3, "t": 5.0} + physical_units:dict, # {"x":"micrometer", "y":"micrometer", "z":"micrometer", "t":"minute"}, + channel_colors:List[str], + ): """ - Write the metadata. + Build a metadata dict suitable for writing to ome-zarr attrs. :param image_name: The image name. :param channel_names: The channel names. :param physical_dims: for each physical dimension, include a scale factor. E.g. {"x":0.1, "y", 0.1, "z", 0.3, "t": 5.0} :param physical_units: For each physical dimension, include a unit string. E.g. {"x":"micrometer", "y":"micrometer", "z":"micrometer", "t":"minute"} """ - # write metadata dims= ("t", "c", "z", "y", "x") axes = [] for dim in dims: @@ -476,7 +481,6 @@ def write_metadata(self, metadata_dict = asdict(metadata) metadata_dict = _pop_metadata_optionals(metadata_dict) - self.root.attrs["multiscales"] = [metadata_dict] # get the total shape as dict: shapedict = dim_tuple_to_dict(self.levels[0].shape) @@ -486,14 +490,24 @@ def write_metadata(self, shapedict["z"] if "z" in shapedict else 1, image_name, channel_names=channel_names, # assumes we have written all channels! - channel_colors=[], # type: ignore + channel_colors=channel_colors, # type: ignore # TODO: Rely on user to supply the per-channel min/max. channel_minmax=[(0.0, 1.0) for i in range(shapedict["c"] if "c" in shapedict else 1)], ) - self.root.attrs["omero"] = ome_json - return metadata + ome_zarr_metadata = { + "multiscales" : [metadata_dict], + "omero" : ome_json + } + return ome_zarr_metadata + def write_metadata(self, metadata:dict): + """ + Write the metadata. + :param metadata: The metadata dict. Expected to contain a multiscales array and omero dict + """ + self.root.attrs["multiscales"] = metadata["multiscales"] + self.root.attrs["omero"] = metadata["omero"] # shapes = compute_level_shapes((500, 4, 150, 2000, 2000),(1,1,2,2,2), 5) diff --git a/setup.py b/setup.py index 00d46b6..91d8626 100644 --- a/setup.py +++ b/setup.py @@ -40,16 +40,16 @@ requirements = [ "aics_dask_utils>=0.2.4", - "aicsfiles @ git+ssh://git@github.com/aics-int/aicsfiles-python.git@v7.1.0", - "bioio-base>=1.0", - "bioio @ git+ssh://git@github.com/bioio-devs/bioio.git", - "bioio-czi>=1.0", - "bioio-nd2>=1.0", - "bioio-ome-tiff>=1.0", - "bioio-ome-zarr>=1.0", - "bioio-sldy>=1.0", - "bioio-tifffile>=1.0", - "bokeh", + # "aicsfiles @ git+ssh://git@github.com/aics-int/aicsfiles-python.git@v7.1.0", + #"bioio-base>=1.0", + #"bioio @ git+ssh://git@github.com/bioio-devs/bioio.git", + #"bioio-czi>=1.0", + #"bioio-nd2>=1.0", + #"bioio-ome-tiff>=1.0", + #"bioio-ome-zarr>=1.0", + #"bioio-sldy>=1.0", + #"bioio-tifffile>=1.0", + #"bokeh", "brotli", "dask[array]", "dask[distributed]", @@ -92,7 +92,7 @@ "make_dataset_from_csv=cellbrowser_tools.bin.make_dataset_from_csv:main", "make_downloader_manifest=cellbrowser_tools.bin.make_downloader_manifest:main", "processImageWithSegmentation=cellbrowser_tools.bin.processImageWithSegmentation:main", - "make_zarr=cellbrowser_tools.bin.make_zarr", + "make_zarr=cellbrowser_tools.bin.make_zarr:main", ], }, install_requires=requirements,