From a83db7fad67d41a324fdd20fd35ed75e26f339ca Mon Sep 17 00:00:00 2001 From: Sean McCulloch <86432671+seanmcculloch@users.noreply.github.com> Date: Fri, 13 Feb 2026 14:50:12 -0800 Subject: [PATCH 1/6] feat: split xml ipd * feat: split xml IP detection - Add split tile shape support to overlap_detection.py - Add split path construction and crop passthrough to metadata_builder.py - Add crop slicing to image_reader.py - Add fetch_local_xml utility function to pipelines/utils.py - Update xml_to_dataframe.py for split XML support - Add uv.lock to gitignore Tests for this feature are in a separate PR. * test: add split XML IP detection tests - Add test_xml_to_dataframe tests for split XML parsing - Add test_image_reader tests for crop slicing - Add test_metadata_builder tests for split metadata handling - Add dataset_split.xml test fixture These tests verify the split XML support added in feat/split-xml-ipd. * Initial plan * Update Rhapso/data_prep/xml_to_dataframe.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update Rhapso/data_prep/xml_to_dataframe.py descriptive error with bad split xmls. Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Fix channel parsing in parse_image_loader_split_zarr to use .ome.zarr suffix Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> * Update Rhapso/detection/metadata_builder.py ceil crop_max when downsampling Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update Rhapso/data_prep/xml_to_dataframe.py descriptive error upon bad split xml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Initial plan * Fix channel extraction, crop validation, path handling, and test signatures Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> * Move import to top level and use calculated level instead of hardcoded '0' Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> * fix: handling of multiscale levels * fix: prevent skipping ip detection with 1 split tile only * feat: add overlapping_only flag (default true) to ipd * Fix non-split zarr path construction in overlap detection (#164) * Initial plan * Fix dim_other path to use proper path joining and include level Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .gitignore | 3 + Rhapso/data_prep/xml_to_dataframe.py | 92 +++++++++- Rhapso/detection/image_reader.py | 15 ++ Rhapso/detection/metadata_builder.py | 46 +++-- Rhapso/detection/overlap_detection.py | 165 ++++++++++++------ .../pipelines/ray/interest_point_detection.py | 9 +- Rhapso/pipelines/utils.py | 26 +++ tests/XML_test_data/dataset_split.xml | 150 ++++++++++++++++ tests/test_data_prep/test_xml_to_dataframe.py | 66 ++++++- tests/test_detection/test_image_reader.py | 102 +++++++++++ tests/test_detection/test_metadata_builder.py | 152 ++++++++++++++++ .../test_detection/test_overlap_detection.py | 9 - 12 files changed, 744 insertions(+), 91 deletions(-) create mode 100644 Rhapso/pipelines/utils.py create mode 100644 tests/XML_test_data/dataset_split.xml create mode 100644 tests/test_detection/test_image_reader.py create mode 100644 tests/test_detection/test_metadata_builder.py diff --git a/.gitignore b/.gitignore index 6f86969..15abfc6 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,9 @@ ipython_config.py # install all needed dependencies. #Pipfile.lock +# uv +uv.lock + # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py index ee1284c..f165cb8 100644 --- a/Rhapso/data_prep/xml_to_dataframe.py +++ b/Rhapso/data_prep/xml_to_dataframe.py @@ -1,5 +1,6 @@ import pandas as pd import xml.etree.ElementTree as ET +import re # This component recieves an XML file containing Tiff or Zarr image metadata and converts # it into several Dataframes @@ -83,17 +84,98 @@ def parse_image_loader_tiff(self, root): # Convert the list to a DataFrame and return return pd.DataFrame(image_loader_data) - def parse_image_loader_split_zarr(self): - pass + def parse_image_loader_split_zarr(self, root): + """ + Parses a split.viewerimgloader XML structure where a single source image is virtually + subdivided into overlapping tiles via SetupIdDefinitions. + + Parameters + ---------- + root : xml.etree.ElementTree.Element + Root element of the parsed XML. + + Returns + ------- + pd.DataFrame + One row per split tile with columns: view_setup, timepoint, series, channel, + file_path, crop_min, crop_max, zarr_base_path. + """ + outer_loader = root.find(".//ImageLoader[@format='split.viewerimgloader']") + if outer_loader is None: + raise ValueError( + "split.viewerimgloader ImageLoader node not found in XML; " + "ensure the XML contains an ImageLoader with format='split.viewerimgloader'." + ) + + inner_loader = outer_loader.find("ImageLoader") + if inner_loader is None: + raise ValueError( + "Nested ImageLoader node not found inside split.viewerimgloader configuration." + ) + + zarr_elem = inner_loader.find("zarr") + if zarr_elem is None or zarr_elem.text is None: + raise ValueError( + " node with base path is missing from split.viewerimgloader configuration." + ) + + zarr_base_path = zarr_elem.text.strip() + # Build lookup from source setup id to (timepoint, zgroup_path) + zgroup_lookup = {} + for zg in inner_loader.findall(".//zgroups/zgroup"): + setup = zg.get("setup") + tp = zg.get("tp") or zg.get("timepoint") + path = zg.get("path") + zgroup_lookup[setup] = (tp, path) + + image_loader_data = [] + for sid in outer_loader.findall(".//SetupIds/SetupIdDefinition"): + new_id = sid.find("NewId").text.strip() + old_id = sid.find("OldId").text.strip() + crop_min = sid.find("min").text.strip() + crop_max = sid.find("max").text.strip() + + if old_id not in zgroup_lookup: + raise ValueError( + "SetupIdDefinition refers to OldId {!r} that is not present in the " + "inner loader's zgroups. Available setup ids: {}".format( + old_id, sorted(zgroup_lookup.keys()) + ) + ) + tp, zgroup_path = zgroup_lookup[old_id] + + # Extract channel using regex to handle both .zarr and .ome.zarr + channel_match = re.search(r'_ch_(\d+)', zgroup_path) + if channel_match: + channel = channel_match.group(1) + else: + channel = 0 + + image_loader_data.append({ + "view_setup": new_id, + "timepoint": tp, + "series": 1, + "channel": channel, + "file_path": zgroup_path, + "crop_min": crop_min, + "crop_max": crop_max, + "zarr_base_path": zarr_base_path, + }) + + return pd.DataFrame(image_loader_data) def route_image_loader(self, root): """ Directs the XML parsing process based on the image loader format specified in the XML. """ format_node = root.find(".//ImageLoader") - format_type = format_node.get("format") + if format_node is None: + raise ValueError("No element found in XML; cannot determine image loader format.") - if "filemap" in format_type: + format_type = (format_node.get("format") or "").lower() + if "split" in format_type: + return self.parse_image_loader_split_zarr(root) + elif "filemap" in format_type: return self.parse_image_loader_tiff(root) else: return self.parse_image_loader_zarr(root) @@ -104,7 +186,7 @@ def parse_view_setups(self, root): """ viewsetups_data = [] - for vs in root.findall(".//ViewSetup"): + for vs in root.findall("./SequenceDescription/ViewSetups/ViewSetup"): id_ = vs.find("id").text # name = vs.find("name").text name = vs.findtext("name") diff --git a/Rhapso/detection/image_reader.py b/Rhapso/detection/image_reader.py index cbe7076..b4d0ab5 100644 --- a/Rhapso/detection/image_reader.py +++ b/Rhapso/detection/image_reader.py @@ -91,6 +91,21 @@ def fetch_image_data(self, record, dsxy, dsz): dask_array = dask_array.astype(np.float32) dask_array = dask_array.transpose() + # Apply split tile crop if present + crop_min = record.get('crop_min') + crop_max = record.get('crop_max') + if crop_min is not None and crop_max is not None: + if len(crop_min) != 3 or len(crop_max) != 3: + raise ValueError( + f"crop_min and crop_max must both be length 3 for 3D cropping; " + f"got crop_min={crop_min}, crop_max={crop_max}" + ) + dask_array = dask_array[ + crop_min[0]:crop_max[0] + 1, + crop_min[1]:crop_max[1] + 1, + crop_min[2]:crop_max[2] + 1 + ] + # Downsample Dask array downsampled_stack = self.interface_downsampling(dask_array, dsxy, dsz) diff --git a/Rhapso/detection/metadata_builder.py b/Rhapso/detection/metadata_builder.py index 203a5be..b03f126 100644 --- a/Rhapso/detection/metadata_builder.py +++ b/Rhapso/detection/metadata_builder.py @@ -21,7 +21,7 @@ def __init__(self, dataframes, overlapping_area, image_file_prefix, file_type, d self.sub_region_chunking = not chunks_per_bound == 0 self.metadata = [] - def build_image_metadata(self, process_intervals, file_path, view_id): + def build_image_metadata(self, process_intervals, file_path, view_id, crop_min=None, crop_max=None): """ Builds list of metadata with optional sub-chunking """ @@ -41,7 +41,9 @@ def build_image_metadata(self, process_intervals, file_path, view_id): 'file_path': file_path, 'interval_key': interval_key, 'offset': 0, - 'lb': lb_fixed + 'lb': lb_fixed, + 'crop_min': crop_min, + 'crop_max': crop_max }) # Apply sub-region chunking @@ -73,8 +75,10 @@ def build_image_metadata(self, process_intervals, file_path, view_id): 'file_path': file_path, 'interval_key': interval_key, 'offset': z, - 'lb' : lb - }) + 'lb' : lb, + 'crop_min': crop_min, + 'crop_max': crop_max + }) elif self.file_type == "zarr": @@ -102,26 +106,44 @@ def build_image_metadata(self, process_intervals, file_path, view_id): 'file_path': file_path, 'interval_key': interval_key, 'offset': z, - 'lb' : lb - }) - + 'lb' : lb, + 'crop_min': crop_min, + 'crop_max': crop_max + }) + def build_paths(self): """ Iterates through views to interface metadata building """ + is_split = 'crop_min' in self.image_loader_df.columns + for _, row in self.image_loader_df.iterrows(): view_id = f"timepoint: {row['timepoint']}, setup: {row['view_setup']}" process_intervals = self.overlapping_area[view_id] - + if self.file_type == 'zarr': - file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}' + if is_split: + file_path = row['zarr_base_path'] + row['file_path'] + f'/{self.level}' + else: + file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}' elif self.file_type == 'tiff': - file_path = self.image_file_prefix + row['file_path'] + file_path = self.image_file_prefix + row['file_path'] else: raise ValueError(f"Unsupported file_type: {self.file_type!r}") - + + # Extract and scale crop bounds for split tiles + crop_min = None + crop_max = None + if is_split: + scale = 2 ** self.level if self.level is not None else 1 + cmin = [int(v) // scale for v in row['crop_min'].split()] + # For inclusive bounds, use a ceil-style mapping for crop_max to avoid shrinking coverage + cmax = [int(np.ceil((int(v) + 1) / scale) - 1) for v in row['crop_max'].split()] + crop_min = cmin + crop_max = cmax + if self.run_type == 'ray': - self.build_image_metadata(process_intervals, file_path, view_id) + self.build_image_metadata(process_intervals, file_path, view_id, crop_min, crop_max) else: raise ValueError(f"Unsupported run type: {self.run_type!r}") diff --git a/Rhapso/detection/overlap_detection.py b/Rhapso/detection/overlap_detection.py index 20e748c..f55a1d1 100644 --- a/Rhapso/detection/overlap_detection.py +++ b/Rhapso/detection/overlap_detection.py @@ -23,12 +23,13 @@ def time_interval(self): pass class OverlapDetection(): - def __init__(self, transform_models, dataframes, dsxy, dsz, prefix, file_type): + def __init__(self, transform_models, dataframes, dsxy, dsz, prefix, file_type, overlapping_only=True): self.transform_models = transform_models self.image_loader_df = dataframes['image_loader'] self.dsxy, self.dsz = dsxy, dsz self.prefix = prefix self.file_type = file_type + self.overlapping_only = overlapping_only self.to_process = {} self.image_shape_cache = {} self.max_interval_size = 0 @@ -66,7 +67,28 @@ def load_image_metadata(self, file_path): self.image_shape_cache[file_path] = shape return shape - + + def _split_tile_shape(self, row): + """Derive 6D shape tuple from split tile crop bounds. + + Parameters + ---------- + row : pd.Series + Row from image_loader_df with 'crop_min' and 'crop_max' columns. + Values are space-separated "X Y Z" strings. + + Returns + ------- + tuple + 6D shape tuple (1, 1, 1, Z, Y, X) matching load_image_metadata format. + """ + cmin = list(map(int, row['crop_min'].split())) + cmax = list(map(int, row['crop_max'].split())) + x_size = cmax[0] - cmin[0] + 1 + y_size = cmax[1] - cmin[1] + 1 + z_size = cmax[2] - cmin[2] + 1 + return (1, 1, 1, z_size, y_size, x_size) + # def open_and_downsample(self, shape): # X = int(shape[5]) # Y = int(shape[4]) @@ -240,15 +262,20 @@ def find_overlapping_area(self): """ Compute XY Z overlap intervals against every other view, accounting for mipmap/downsampling and per-view affine transforms """ + is_split = 'crop_min' in self.image_loader_df.columns + for i, row_i in self.image_loader_df.iterrows(): view_id = f"timepoint: {row_i['timepoint']}, setup: {row_i['view_setup']}" # get inverted matrice of downsampling - all_intervals = [] + all_intervals = [] if self.file_type == 'zarr': level, leftovers = self.choose_zarr_level() - dim_base = self.load_image_metadata(os.path.join(self.prefix, row_i['file_path'])) + if is_split: + dim_base = self._split_tile_shape(row_i) + else: + dim_base = self.load_image_metadata(os.path.join(self.prefix, row_i['file_path'], str(level))) # isotropic pyramid s = float(2 ** level) @@ -264,59 +291,91 @@ def find_overlapping_area(self): level = None downsampled_dim_base = self.open_and_downsample(dim_base, dsxy, dsz) - t1 = self.get_inverse_mipmap_transform(mipmap_of_downsample) - - # compare with all view_ids - for j, row_j in self.image_loader_df.iterrows(): - if i == j: continue - - view_id_other = f"timepoint: {row_j['timepoint']}, setup: {row_j['view_setup']}" - - if self.file_type == 'zarr': - dim_other = self.load_image_metadata(self.prefix + row_j['file_path'] + f'/{0}') - elif self.file_type == 'tiff': - dim_other = self.load_image_metadata(self.prefix + row_j['file_path']) - - # get transforms matrix from both view_ids and downsampling matrices - matrix = self.transform_models.get(view_id) - matrix_other = self.transform_models.get(view_id_other) - - if self.file_type == 'zarr': - s = float(2 ** level) - mipmap_of_downsample_other = self.affine_with_half_pixel_shift(s, s, s) - elif self.file_type == 'tiff': - mipmap_of_downsample_other = self.create_mipmap_transform() - - inverse_mipmap_of_downsample_other = self.get_inverse_mipmap_transform(mipmap_of_downsample_other) - inverse_matrix = self.get_inverse_mipmap_transform(matrix) - - concatenated_matrix = np.dot(inverse_matrix, matrix_other) - t2 = np.dot(inverse_mipmap_of_downsample_other, concatenated_matrix) - - intervals = self.estimate_bounds(t1, dim_base) - intervals_other = self.estimate_bounds(t2, dim_other) - - bounding_boxes = tuple(map(lambda x: np.round(x).astype(int), intervals)) - bounding_boxes_other = tuple(map(lambda x: np.round(x).astype(int), intervals_other)) - - # find upper and lower bounds of intersection - if np.all((bounding_boxes[1] >= bounding_boxes_other[0]) & (bounding_boxes_other[1] >= bounding_boxes[0])): - intersected_boxes = self.calculate_intersection(bounding_boxes, bounding_boxes_other) - intersect = self.calculate_intersection(downsampled_dim_base, intersected_boxes) - intersect_dict = { - 'lower_bound': intersect[0], - 'upper_bound': intersect[1], - 'span': self.calculate_new_dims(intersect[0], intersect[1]) - } - - lb, ub = intersect[0], intersect[1] + t1 = self.get_inverse_mipmap_transform(mipmap_of_downsample) + + if self.overlapping_only: + # compare with all view_ids + for j, row_j in self.image_loader_df.iterrows(): + if i == j: continue + + view_id_other = f"timepoint: {row_j['timepoint']}, setup: {row_j['view_setup']}" + + if self.file_type == 'zarr': + if is_split: + dim_other = self._split_tile_shape(row_j) + else: + dim_other = self.load_image_metadata(os.path.join(self.prefix, row_j['file_path'], str(level))) + elif self.file_type == 'tiff': + dim_other = self.load_image_metadata(os.path.join(self.prefix, row_j['file_path'])) + + # get transforms matrix from both view_ids and downsampling matrices + matrix = self.transform_models.get(view_id) + matrix_other = self.transform_models.get(view_id_other) + + if self.file_type == 'zarr': + s = float(2 ** level) + mipmap_of_downsample_other = self.affine_with_half_pixel_shift(s, s, s) + elif self.file_type == 'tiff': + mipmap_of_downsample_other = self.create_mipmap_transform() + + inverse_mipmap_of_downsample_other = self.get_inverse_mipmap_transform(mipmap_of_downsample_other) + inverse_matrix = self.get_inverse_mipmap_transform(matrix) + + concatenated_matrix = np.dot(inverse_matrix, matrix_other) + t2 = np.dot(inverse_mipmap_of_downsample_other, concatenated_matrix) + + intervals = self.estimate_bounds(t1, dim_base) + intervals_other = self.estimate_bounds(t2, dim_other) + + bounding_boxes = tuple(map(lambda x: np.round(x).astype(int), intervals)) + bounding_boxes_other = tuple(map(lambda x: np.round(x).astype(int), intervals_other)) + + # find upper and lower bounds of intersection + if np.all((bounding_boxes[1] >= bounding_boxes_other[0]) & (bounding_boxes_other[1] >= bounding_boxes[0])): + intersected_boxes = self.calculate_intersection(bounding_boxes, bounding_boxes_other) + intersect = self.calculate_intersection(downsampled_dim_base, intersected_boxes) + intersect_dict = { + 'lower_bound': intersect[0], + 'upper_bound': intersect[1], + 'span': self.calculate_new_dims(intersect[0], intersect[1]) + } + + lb, ub = intersect[0], intersect[1] + sz = self.size_interval(lb, ub) + if sz > self.max_interval_size: + self.max_interval_size = sz + + # add max size + all_intervals.append(intersect_dict) + + # Single-view dataset: no pairwise overlaps exist, so use the + # full downsampled volume as the processing region. + if not all_intervals and len(self.image_loader_df) == 1: + lb = np.array(downsampled_dim_base[0]) + ub = np.array(downsampled_dim_base[1]) + all_intervals.append({ + 'lower_bound': lb, + 'upper_bound': ub, + 'span': self.calculate_new_dims(lb, ub), + }) sz = self.size_interval(lb, ub) if sz > self.max_interval_size: self.max_interval_size = sz - # add max size - all_intervals.append(intersect_dict) - + else: + # Full-volume mode: use the entire downsampled tile as the + # processing region (for registration, not stitching). + lb = np.array(downsampled_dim_base[0]) + ub = np.array(downsampled_dim_base[1]) + all_intervals.append({ + 'lower_bound': lb, + 'upper_bound': ub, + 'span': self.calculate_new_dims(lb, ub), + }) + sz = self.size_interval(lb, ub) + if sz > self.max_interval_size: + self.max_interval_size = sz + self.to_process[view_id] = all_intervals return dsxy, dsz, level, mipmap_of_downsample diff --git a/Rhapso/pipelines/ray/interest_point_detection.py b/Rhapso/pipelines/ray/interest_point_detection.py index 1af6aa9..9db2b5c 100644 --- a/Rhapso/pipelines/ray/interest_point_detection.py +++ b/Rhapso/pipelines/ray/interest_point_detection.py @@ -13,9 +13,9 @@ # This class implements the interest point detection pipeline class InterestPointDetection: - def __init__(self, dsxy, dsz, min_intensity, max_intensity, sigma, threshold, file_type, xml_file_path, - image_file_prefix, xml_output_file_path, n5_output_file_prefix, combine_distance, chunks_per_bound, run_type, - max_spots, median_filter): + def __init__(self, dsxy, dsz, min_intensity, max_intensity, sigma, threshold, file_type, xml_file_path, + image_file_prefix, xml_output_file_path, n5_output_file_prefix, combine_distance, chunks_per_bound, run_type, + max_spots, median_filter, overlapping_only=True): self.dsxy = dsxy self.dsz = dsz self.min_intensity = min_intensity @@ -32,6 +32,7 @@ def __init__(self, dsxy, dsz, min_intensity, max_intensity, sigma, threshold, fi self.run_type = run_type self.max_spots = max_spots self.median_filter = median_filter + self.overlapping_only = overlapping_only def detection(self): # Get XML file @@ -57,7 +58,7 @@ def detection(self): print("Transforms models have been created") # Use view transform matrices to find areas of overlap - overlap_detection = OverlapDetection(view_transform_matrices, dataframes, self.dsxy, self.dsz, self.image_file_prefix, self.file_type) + overlap_detection = OverlapDetection(view_transform_matrices, dataframes, self.dsxy, self.dsz, self.image_file_prefix, self.file_type, overlapping_only=self.overlapping_only) overlapping_area, new_dsxy, new_dsz, level, max_interval_size, mip_map_downsample = overlap_detection.run() print("Overlap detection is done") diff --git a/Rhapso/pipelines/utils.py b/Rhapso/pipelines/utils.py new file mode 100644 index 0000000..0b33c38 --- /dev/null +++ b/Rhapso/pipelines/utils.py @@ -0,0 +1,26 @@ +""" +Utility functions for pipelines +""" + + +def fetch_local_xml(file_path): + """ + Read XML content from a local file. + + Parameters + ---------- + file_path : str + Path to the XML file + + Returns + ------- + str + XML file contents + """ + try: + with open(file_path, "r", encoding="utf-8") as file: + return file.read() + except FileNotFoundError: + raise FileNotFoundError(f"Could not find XML file at '{file_path}'") + except Exception as e: + raise RuntimeError(f"Error reading XML file at '{file_path}': {e}") diff --git a/tests/XML_test_data/dataset_split.xml b/tests/XML_test_data/dataset_split.xml new file mode 100644 index 0000000..9cc492f --- /dev/null +++ b/tests/XML_test_data/dataset_split.xml @@ -0,0 +1,150 @@ + + + . + + + + 0 + Tile 0 + 500 500 100 + + um + 1.0 1.0 1.0 + + + 0 + + + + 1 + Tile 1 + 500 500 100 + + um + 1.0 1.0 1.0 + + + 1 + + + + 2 + Tile 2 + 500 500 100 + + um + 1.0 1.0 1.0 + + + 2 + + + + 3 + Tile 3 + 500 500 100 + + um + 1.0 1.0 1.0 + + + 3 + + + + + 0 + 0 + + + + + + + calibration + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + Image Splitting + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + + + calibration + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + Image Splitting + 1.0 0.0 0.0 300.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + + + calibration + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + Image Splitting + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 300.0 0.0 0.0 1.0 0.0 + + + + + calibration + 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 + + + Image Splitting + 1.0 0.0 0.0 300.0 0.0 1.0 0.0 300.0 0.0 0.0 1.0 0.0 + + + + + + s3://test-bucket/SPIM.ome.zarr/ + + + + + + + + 0 + Source Image + 800 800 100 + + um + 1.0 1.0 1.0 + + + + + + + 0 + 0 + 0 0 0 + 499 499 99 + + + 1 + 0 + 300 0 0 + 799 499 99 + + + 2 + 0 + 0 300 0 + 499 799 99 + + + 3 + 0 + 300 300 0 + 799 799 99 + + + + diff --git a/tests/test_data_prep/test_xml_to_dataframe.py b/tests/test_data_prep/test_xml_to_dataframe.py index 6118b84..fee6318 100644 --- a/tests/test_data_prep/test_xml_to_dataframe.py +++ b/tests/test_data_prep/test_xml_to_dataframe.py @@ -67,28 +67,26 @@ def test_parse_view_interest_points(self): xml_content = fetch_local_xml(self.xml_content_standard) self.parser = XMLToDataFrame(xml_content) root = ET.fromstring(xml_content) - df = self.parser.parse_view_interest_points(root, "data_prep") + df = self.parser.parse_view_interest_points(root) self.assertTrue(df.empty) def test_run(self): xml_content = fetch_local_xml(self.xml_content_standard) self.parser = XMLToDataFrame(xml_content) - result = self.parser.run("data_prep") + result = self.parser.run() self.assertIn("image_loader", result) self.assertIn("view_setups", result) self.assertIn("view_registrations", result) self.assertIn("view_interest_points", result) def test_interest_points_already_exist(self): + """Test that existing interest points are parsed correctly""" xml_content = fetch_local_xml(self.xml_content_interestPoints) self.parser = XMLToDataFrame(xml_content) root = ET.fromstring(xml_content) - with self.assertRaises(Exception) as context: - self.parser.parse_view_interest_points(root, "data_prep") - self.assertEqual( - str(context.exception), - "There should be no interest points in this file yet.", - ) + df = self.parser.parse_view_interest_points(root) + # Should parse existing interest points without raising an exception + self.assertIsInstance(df, pd.DataFrame) def test_no_labels(self): xml_content = fetch_local_xml(self.xml_content_no_tags) @@ -130,6 +128,58 @@ def test_no_file_mapping_exists(self): self.parser.parse_image_loader_tiff(root) self.assertEqual(str(context.exception), "There are no files in this XML") + def test_parse_image_loader_split_zarr(self): + """Test split zarr parsing with 4 tiles""" + xml_path = "tests/XML_test_data/dataset_split.xml" + xml_content = fetch_local_xml(xml_path) + self.parser = XMLToDataFrame(xml_content) + result = self.parser.run() + df = result['image_loader'] + + # Should have 4 rows (one per split tile) + self.assertEqual(len(df), 4) + + # Check required columns exist + expected_cols = {'view_setup', 'timepoint', 'crop_min', 'crop_max', 'zarr_base_path', 'file_path'} + self.assertTrue(expected_cols.issubset(set(df.columns))) + + # Check values for each tile + self.assertEqual(df.iloc[0]['view_setup'], '0') + self.assertEqual(df.iloc[0]['crop_min'], '0 0 0') + self.assertEqual(df.iloc[0]['crop_max'], '499 499 99') + self.assertEqual(df.iloc[0]['zarr_base_path'], 's3://test-bucket/SPIM.ome.zarr/') + + self.assertEqual(df.iloc[1]['view_setup'], '1') + self.assertEqual(df.iloc[1]['crop_min'], '300 0 0') + self.assertEqual(df.iloc[1]['crop_max'], '799 499 99') + + self.assertEqual(df.iloc[2]['view_setup'], '2') + self.assertEqual(df.iloc[2]['crop_min'], '0 300 0') + self.assertEqual(df.iloc[2]['crop_max'], '499 799 99') + + self.assertEqual(df.iloc[3]['view_setup'], '3') + self.assertEqual(df.iloc[3]['crop_min'], '300 300 0') + self.assertEqual(df.iloc[3]['crop_max'], '799 799 99') + + # All rows should have same file_path and timepoint + self.assertEqual(df.iloc[0]['file_path'], df.iloc[3]['file_path']) + self.assertEqual(df.iloc[0]['timepoint'], df.iloc[3]['timepoint']) + + def test_parse_view_setups_split(self): + """Test that outer ViewSetups are parsed correctly for split XML""" + xml_path = "tests/XML_test_data/dataset_split.xml" + xml_content = fetch_local_xml(xml_path) + self.parser = XMLToDataFrame(xml_content) + root = ET.fromstring(xml_content) + df = self.parser.parse_view_setups(root) + + # Should have 4 rows (outer ViewSetups only, not the inner one) + self.assertEqual(len(df), 4) + + # Check IDs + ids = sorted([df.iloc[i]['id'] for i in range(len(df))]) + self.assertEqual(ids, ['0', '1', '2', '3']) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_detection/test_image_reader.py b/tests/test_detection/test_image_reader.py new file mode 100644 index 0000000..85f193a --- /dev/null +++ b/tests/test_detection/test_image_reader.py @@ -0,0 +1,102 @@ +import unittest +from unittest.mock import patch, MagicMock +import dask.array as da +import numpy as np + +from Rhapso.detection.image_reader import ImageReader + + +class TestImageReader(unittest.TestCase): + def test_fetch_image_data_crop_applied_before_downsampling(self): + """Test that crop is applied after transpose, before downsampling""" + reader = ImageReader(file_type='zarr') + + # Create a mock record with crop bounds + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': 's3://bucket/test.zarr/0', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': [2, 2, 2], + 'crop_max': [7, 7, 7] + } + + # Mock the zarr opening to return a known dask array (10x10x10) + mock_array = da.ones((1, 1, 10, 10, 10), dtype=np.float32) + + with patch('zarr.open') as mock_zarr, \ + patch('s3fs.S3FileSystem'), \ + patch('s3fs.S3Map'), \ + patch('dask.array.from_zarr', return_value=mock_array): + + # Call fetch_image_data + view_id, interval_key, chunk, offset, lower_bound = reader.fetch_image_data( + record, dsxy=1, dsz=1 + ) + + # Verify crop was applied: array should be [2:8, 2:8, 2:8] = 6x6x6 + self.assertEqual(chunk.shape, (6, 6, 6)) + self.assertEqual(view_id, 'timepoint: 0, setup: 0') + + def test_fetch_image_data_without_crop(self): + """Test backward compatibility: records without crop fields work normally""" + reader = ImageReader(file_type='zarr') + + # Record without crop fields + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': 's3://bucket/test.zarr/0', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': None, + 'crop_max': None + } + + # Mock the zarr opening to return a known dask array + mock_array = da.ones((1, 1, 10, 10, 10), dtype=np.float32) + + with patch('zarr.open') as mock_zarr, \ + patch('s3fs.S3FileSystem'), \ + patch('s3fs.S3Map'), \ + patch('dask.array.from_zarr', return_value=mock_array): + + # Call fetch_image_data - should not raise an error + view_id, interval_key, chunk, offset, lower_bound = reader.fetch_image_data( + record, dsxy=1, dsz=1 + ) + + # Should succeed without crop + self.assertEqual(view_id, 'timepoint: 0, setup: 0') + + def test_fetch_image_data_tiff_no_crop_error(self): + """Test that tiff mode without crop works (no changes to tiff path)""" + reader = ImageReader(file_type='tiff') + + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': '/path/to/test.tif', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': None, + 'crop_max': None + } + + # Mock the BioImage reader + mock_bioimage = MagicMock() + mock_dask_array = da.ones((1, 1, 1, 10, 10, 10), dtype=np.float32) + mock_bioimage.get_dask_stack.return_value = mock_dask_array + + with patch('Rhapso.detection.image_reader.CustomBioImage', return_value=mock_bioimage): + # Should not raise an error + view_id, interval_key, chunk, offset, lower_bound = reader.fetch_image_data( + record, dsxy=1, dsz=1 + ) + + self.assertEqual(view_id, 'timepoint: 0, setup: 0') + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_detection/test_metadata_builder.py b/tests/test_detection/test_metadata_builder.py new file mode 100644 index 0000000..8602985 --- /dev/null +++ b/tests/test_detection/test_metadata_builder.py @@ -0,0 +1,152 @@ +import unittest +import pandas as pd +import numpy as np + +from Rhapso.detection.metadata_builder import MetadataBuilder + + +class TestMetadataBuilder(unittest.TestCase): + def test_build_paths_split_uses_zarr_base_path(self): + """Test that split mode uses zarr_base_path for file path construction""" + # Create mock image_loader_df with split columns + image_loader_df = pd.DataFrame({ + 'view_setup': ['0', '1'], + 'timepoint': ['0', '0'], + 'file_path': ['Tile_X_0000_Y_0000_Z_0000_ch_405.zarr', 'Tile_X_0000_Y_0000_Z_0000_ch_405.zarr'], + 'crop_min': ['0 0 0', '300 0 0'], + 'crop_max': ['499 499 99', '799 499 99'], + 'zarr_base_path': ['s3://bucket/SPIM.ome.zarr/', 's3://bucket/SPIM.ome.zarr/'] + }) + + # Mock overlapping_area + overlapping_area = { + 'timepoint: 0, setup: 0': [{'lower_bound': np.array([0, 0, 0]), 'upper_bound': np.array([100, 100, 50])}], + 'timepoint: 0, setup: 1': [{'lower_bound': np.array([50, 0, 0]), 'upper_bound': np.array([150, 100, 50])}] + } + + dataframes = {'image_loader': image_loader_df} + builder = MetadataBuilder( + dataframes=dataframes, + overlapping_area=overlapping_area, + image_file_prefix='s3://bucket/SPIM.ome.zarr/', + file_type='zarr', + dsxy=1.0, + dsz=1.0, + chunks_per_bound=1, + sigma=1.0, + run_type='ray', + level=0 + ) + builder.build_paths() + + # Check that file_path uses zarr_base_path + self.assertTrue( + 'zarr' in builder.metadata[0]['file_path'], + f"File path should contain zarr path: {builder.metadata[0]['file_path']}" + ) + + def test_build_paths_split_passes_crop_bounds(self): + """Test that crop bounds are included in metadata records""" + image_loader_df = pd.DataFrame({ + 'view_setup': ['0'], + 'timepoint': ['0'], + 'file_path': ['Tile_X_0000_Y_0000_Z_0000_ch_405.zarr'], + 'crop_min': ['0 0 0'], + 'crop_max': ['499 499 99'], + 'zarr_base_path': ['s3://bucket/SPIM.ome.zarr/'] + }) + + overlapping_area = { + 'timepoint: 0, setup: 0': [{'lower_bound': np.array([0, 0, 0]), 'upper_bound': np.array([100, 100, 50])}] + } + + dataframes = {'image_loader': image_loader_df} + builder = MetadataBuilder( + dataframes=dataframes, + overlapping_area=overlapping_area, + image_file_prefix='s3://bucket/SPIM.ome.zarr/', + file_type='zarr', + dsxy=1.0, + dsz=1.0, + chunks_per_bound=0, # No chunking + sigma=1.0, + run_type='ray', + level=0 + ) + builder.build_paths() + + # Check that crop_min and crop_max are in metadata + self.assertIn('crop_min', builder.metadata[0]) + self.assertIn('crop_max', builder.metadata[0]) + self.assertEqual(builder.metadata[0]['crop_min'], [0, 0, 0]) + self.assertEqual(builder.metadata[0]['crop_max'], [499, 499, 99]) + + def test_build_paths_split_scales_crop_bounds_by_level(self): + """Test that crop bounds are scaled by 2^level""" + image_loader_df = pd.DataFrame({ + 'view_setup': ['0'], + 'timepoint': ['0'], + 'file_path': ['Tile_X_0000_Y_0000_Z_0000_ch_405.zarr'], + 'crop_min': ['300 0 0'], + 'crop_max': ['799 499 99'], + 'zarr_base_path': ['s3://bucket/SPIM.ome.zarr/'] + }) + + overlapping_area = { + 'timepoint: 0, setup: 0': [{'lower_bound': np.array([0, 0, 0]), 'upper_bound': np.array([100, 100, 50])}] + } + + dataframes = {'image_loader': image_loader_df} + # level=2 means scale by 2^2 = 4 + builder = MetadataBuilder( + dataframes=dataframes, + overlapping_area=overlapping_area, + image_file_prefix='s3://bucket/SPIM.ome.zarr/', + file_type='zarr', + dsxy=1.0, + dsz=1.0, + chunks_per_bound=0, + sigma=1.0, + run_type='ray', + level=2 + ) + builder.build_paths() + + # 300 // 4 = 75, 799 // 4 = 199, etc. + self.assertEqual(builder.metadata[0]['crop_min'], [75, 0, 0]) + self.assertEqual(builder.metadata[0]['crop_max'], [199, 124, 24]) + + def test_build_paths_regular_zarr_no_crop(self): + """Test backward compatibility: regular zarr has no crop fields""" + image_loader_df = pd.DataFrame({ + 'view_setup': ['0'], + 'timepoint': ['0'], + 'file_path': ['test.zarr'] + }) + + overlapping_area = { + 'timepoint: 0, setup: 0': [{'lower_bound': np.array([0, 0, 0]), 'upper_bound': np.array([100, 100, 50])}] + } + + dataframes = {'image_loader': image_loader_df} + builder = MetadataBuilder( + dataframes=dataframes, + overlapping_area=overlapping_area, + image_file_prefix='s3://bucket/', + file_type='zarr', + dsxy=1.0, + dsz=1.0, + chunks_per_bound=0, + sigma=1.0, + run_type='ray', + level=0 + ) + builder.build_paths() + + # Regular zarr should have None for crop fields + self.assertIsNone(builder.metadata[0]['crop_min']) + self.assertIsNone(builder.metadata[0]['crop_max']) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_detection/test_overlap_detection.py b/tests/test_detection/test_overlap_detection.py index e4f9e88..f6b7ad1 100644 --- a/tests/test_detection/test_overlap_detection.py +++ b/tests/test_detection/test_overlap_detection.py @@ -1,5 +1,4 @@ import unittest -<<<<<<< HEAD import numpy as np import pandas as pd @@ -55,14 +54,6 @@ def test_find_overlapping_area_empty_dataframe(self): with self.assertRaises(ValueError) as context: self.od.find_overlapping_area() self.assertEqual(str(context.exception), "Image Loader dataframe is empty.") -======= - - -class TestOverlapDetecttion(unittest.TestCase): - - def setUp(self): - pass ->>>>>>> main if __name__ == "__main__": From 97948dba42bf9f8aa1daf302475908d98659a816 Mon Sep 17 00:00:00 2001 From: Sean McCulloch <86432671+seanmcculloch@users.noreply.github.com> Date: Sat, 14 Feb 2026 16:30:16 -0800 Subject: [PATCH 2/6] chore: typo Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Rhapso/data_prep/xml_to_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py index f165cb8..5271829 100644 --- a/Rhapso/data_prep/xml_to_dataframe.py +++ b/Rhapso/data_prep/xml_to_dataframe.py @@ -2,7 +2,7 @@ import xml.etree.ElementTree as ET import re -# This component recieves an XML file containing Tiff or Zarr image metadata and converts +# This component receives an XML file containing Tiff or Zarr image metadata and converts # it into several Dataframes class XMLToDataFrame: From 080293391336be7c5d23cd94087a1b7264ed3d25 Mon Sep 17 00:00:00 2001 From: Sean McCulloch <86432671+seanmcculloch@users.noreply.github.com> Date: Sat, 14 Feb 2026 16:31:10 -0800 Subject: [PATCH 3/6] docs: inline comment on channel parsing Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Rhapso/data_prep/xml_to_dataframe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py index 5271829..2b2f951 100644 --- a/Rhapso/data_prep/xml_to_dataframe.py +++ b/Rhapso/data_prep/xml_to_dataframe.py @@ -144,11 +144,14 @@ def parse_image_loader_split_zarr(self, root): ) tp, zgroup_path = zgroup_lookup[old_id] - # Extract channel using regex to handle both .zarr and .ome.zarr + # Attempt to extract the channel from the path, assuming filenames include '_ch_' + # (e.g. both '.zarr' and '.ome.zarr' variants). If this pattern is not present or is + # formatted differently, we deliberately fall back to channel 0 as a default. channel_match = re.search(r'_ch_(\d+)', zgroup_path) if channel_match: channel = channel_match.group(1) else: + # Default to channel 0 when channel information cannot be parsed from the path. channel = 0 image_loader_data.append({ From f867da6652a7fa724aa63ba01baac92cde10fc33 Mon Sep 17 00:00:00 2001 From: Sean McCulloch <86432671+seanmcculloch@users.noreply.github.com> Date: Sat, 14 Feb 2026 16:31:50 -0800 Subject: [PATCH 4/6] chore: .format to f string Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Rhapso/data_prep/xml_to_dataframe.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py index 2b2f951..1a13b4b 100644 --- a/Rhapso/data_prep/xml_to_dataframe.py +++ b/Rhapso/data_prep/xml_to_dataframe.py @@ -137,10 +137,8 @@ def parse_image_loader_split_zarr(self, root): if old_id not in zgroup_lookup: raise ValueError( - "SetupIdDefinition refers to OldId {!r} that is not present in the " - "inner loader's zgroups. Available setup ids: {}".format( - old_id, sorted(zgroup_lookup.keys()) - ) + f"SetupIdDefinition refers to OldId {old_id!r} that is not present in the " + f"inner loader's zgroups. Available setup ids: {sorted(zgroup_lookup.keys())}" ) tp, zgroup_path = zgroup_lookup[old_id] From 6dc5000a4f3da9451cfa750dc4351d5de8e29e37 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Feb 2026 18:27:36 -0800 Subject: [PATCH 5/6] chore: Use os.path.join for path construction in metadata_builder (#168) * Initial plan * Use os.path.join for path construction in metadata_builder Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> * Use os.path.join consistently for all path construction Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --- Rhapso/detection/metadata_builder.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Rhapso/detection/metadata_builder.py b/Rhapso/detection/metadata_builder.py index b03f126..b4ef913 100644 --- a/Rhapso/detection/metadata_builder.py +++ b/Rhapso/detection/metadata_builder.py @@ -1,3 +1,4 @@ +import os import numpy as np """ @@ -123,11 +124,11 @@ def build_paths(self): if self.file_type == 'zarr': if is_split: - file_path = row['zarr_base_path'] + row['file_path'] + f'/{self.level}' + file_path = os.path.join(row['zarr_base_path'], row['file_path'], str(self.level)) else: - file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}' + file_path = os.path.join(self.image_file_prefix, row['file_path'], str(self.level)) elif self.file_type == 'tiff': - file_path = self.image_file_prefix + row['file_path'] + file_path = os.path.join(self.image_file_prefix, row['file_path']) else: raise ValueError(f"Unsupported file_type: {self.file_type!r}") From a82474c62a3aa6f745b3d8f3e71de5f4907eb8f0 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Feb 2026 20:12:08 -0800 Subject: [PATCH 6/6] test: Add crop bounds validation for split XML IPD implementation (#169) * Initial plan * Add crop bounds validation with comprehensive tests Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> * Enhance test to verify complete error message with shape Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: seanmcculloch <86432671+seanmcculloch@users.noreply.github.com> --- Rhapso/detection/image_reader.py | 20 ++++++ tests/test_detection/test_image_reader.py | 83 +++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/Rhapso/detection/image_reader.py b/Rhapso/detection/image_reader.py index b4d0ab5..c01640e 100644 --- a/Rhapso/detection/image_reader.py +++ b/Rhapso/detection/image_reader.py @@ -100,6 +100,26 @@ def fetch_image_data(self, record, dsxy, dsz): f"crop_min and crop_max must both be length 3 for 3D cropping; " f"got crop_min={crop_min}, crop_max={crop_max}" ) + + # Validate crop bounds are within array dimensions + array_shape = dask_array.shape + for i in range(3): + if crop_min[i] < 0: + raise ValueError( + f"crop_min[{i}]={crop_min[i]} is negative; " + f"crop bounds must be non-negative" + ) + if crop_max[i] >= array_shape[i]: + raise ValueError( + f"crop_max[{i}]={crop_max[i]} exceeds array dimension {i} " + f"(shape={array_shape[i]}); crop_max must be < array shape" + ) + if crop_min[i] > crop_max[i]: + raise ValueError( + f"crop_min[{i}]={crop_min[i]} > crop_max[{i}]={crop_max[i]}; " + f"crop_min must be <= crop_max" + ) + dask_array = dask_array[ crop_min[0]:crop_max[0] + 1, crop_min[1]:crop_max[1] + 1, diff --git a/tests/test_detection/test_image_reader.py b/tests/test_detection/test_image_reader.py index 85f193a..e0c10cc 100644 --- a/tests/test_detection/test_image_reader.py +++ b/tests/test_detection/test_image_reader.py @@ -97,6 +97,89 @@ def test_fetch_image_data_tiff_no_crop_error(self): self.assertEqual(view_id, 'timepoint: 0, setup: 0') + def test_fetch_image_data_crop_bounds_validation(self): + """Test that crop bounds exceeding array dimensions raise clear error""" + reader = ImageReader(file_type='zarr') + + # Record with crop_max exceeding array dimensions + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': 's3://bucket/test.zarr/0', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': [0, 0, 0], + 'crop_max': [15, 5, 5] # Exceeds dimension 0 (10x10x10 array) + } + + # Mock the zarr opening to return a known dask array (10x10x10) + mock_array = da.ones((1, 1, 10, 10, 10), dtype=np.float32) + + with patch('zarr.open') as mock_zarr, \ + patch('s3fs.S3FileSystem'), \ + patch('s3fs.S3Map'), \ + patch('dask.array.from_zarr', return_value=mock_array): + + # Should raise ValueError with clear message + with self.assertRaises(ValueError) as context: + reader.fetch_image_data(record, dsxy=1, dsz=1) + + error_msg = str(context.exception) + self.assertIn('crop_max[0]=15 exceeds array dimension 0', error_msg) + self.assertIn('(shape=10)', error_msg) + + def test_fetch_image_data_negative_crop_min(self): + """Test that negative crop_min values raise clear error""" + reader = ImageReader(file_type='zarr') + + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': 's3://bucket/test.zarr/0', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': [-1, 0, 0], + 'crop_max': [5, 5, 5] + } + + mock_array = da.ones((1, 1, 10, 10, 10), dtype=np.float32) + + with patch('zarr.open') as mock_zarr, \ + patch('s3fs.S3FileSystem'), \ + patch('s3fs.S3Map'), \ + patch('dask.array.from_zarr', return_value=mock_array): + + with self.assertRaises(ValueError) as context: + reader.fetch_image_data(record, dsxy=1, dsz=1) + + self.assertIn('crop_min[0]=-1 is negative', str(context.exception)) + + def test_fetch_image_data_crop_min_greater_than_crop_max(self): + """Test that crop_min > crop_max raises clear error""" + reader = ImageReader(file_type='zarr') + + record = { + 'view_id': 'timepoint: 0, setup: 0', + 'file_path': 's3://bucket/test.zarr/0', + 'interval_key': ((0, 0, 0), (50, 50, 25), (51, 51, 26)), + 'offset': 0, + 'lb': (0, 0, 0), + 'crop_min': [5, 0, 0], + 'crop_max': [3, 5, 5] # crop_min[0] > crop_max[0] + } + + mock_array = da.ones((1, 1, 10, 10, 10), dtype=np.float32) + + with patch('zarr.open') as mock_zarr, \ + patch('s3fs.S3FileSystem'), \ + patch('s3fs.S3Map'), \ + patch('dask.array.from_zarr', return_value=mock_array): + + with self.assertRaises(ValueError) as context: + reader.fetch_image_data(record, dsxy=1, dsz=1) + + self.assertIn('crop_min[0]=5 > crop_max[0]=3', str(context.exception)) + if __name__ == "__main__": unittest.main()