AllenNeuralDynamics · seanmcculloch · Feb 13, 2026 · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026
diff --git a/.gitignore b/.gitignore
@@ -91,6 +91,9 @@ ipython_config.py
 #   install all needed dependencies.
 #Pipfile.lock
 
+# uv
+uv.lock
+
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 

diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py
@@ -1,7 +1,8 @@
 import pandas as pd
 import xml.etree.ElementTree as ET
+import re
 
-# This component recieves an XML file containing Tiff or Zarr image metadata and converts
+# This component receives an XML file containing Tiff or Zarr image metadata and converts
 # it into several Dataframes
 
 class XMLToDataFrame:
@@ -83,17 +84,99 @@ def parse_image_loader_tiff(self, root):
         # Convert the list to a DataFrame and return
         return pd.DataFrame(image_loader_data)
 
-    def parse_image_loader_split_zarr(self):
-        pass
+    def parse_image_loader_split_zarr(self, root):
+        """
+        Parses a split.viewerimgloader XML structure where a single source image is virtually
+        subdivided into overlapping tiles via SetupIdDefinitions.
+
+        Parameters
+        ----------
+        root : xml.etree.ElementTree.Element
+            Root element of the parsed XML.
+
+        Returns
+        -------
+        pd.DataFrame
+            One row per split tile with columns: view_setup, timepoint, series, channel,
+            file_path, crop_min, crop_max, zarr_base_path.
+        """
+        outer_loader = root.find(".//ImageLoader[@format='split.viewerimgloader']")
+        if outer_loader is None:
+            raise ValueError(
+                "split.viewerimgloader ImageLoader node not found in XML; "
+                "ensure the XML contains an ImageLoader with format='split.viewerimgloader'."
+            )
+
+        inner_loader = outer_loader.find("ImageLoader")
+        if inner_loader is None:
+            raise ValueError(
+                "Nested ImageLoader node not found inside split.viewerimgloader configuration."
+            )
+
+        zarr_elem = inner_loader.find("zarr")
+        if zarr_elem is None or zarr_elem.text is None:
+            raise ValueError(
+                "<zarr> node with base path is missing from split.viewerimgloader configuration."
+            )
+
+        zarr_base_path = zarr_elem.text.strip()
+        # Build lookup from source setup id to (timepoint, zgroup_path)
+        zgroup_lookup = {}
+        for zg in inner_loader.findall(".//zgroups/zgroup"):
+            setup = zg.get("setup")
+            tp = zg.get("tp") or zg.get("timepoint")
+            path = zg.get("path")
+            zgroup_lookup[setup] = (tp, path)
+
+        image_loader_data = []
+        for sid in outer_loader.findall(".//SetupIds/SetupIdDefinition"):
+            new_id = sid.find("NewId").text.strip()
+            old_id = sid.find("OldId").text.strip()
+            crop_min = sid.find("min").text.strip()
+            crop_max = sid.find("max").text.strip()
+
+            if old_id not in zgroup_lookup:
+                raise ValueError(
+                    f"SetupIdDefinition refers to OldId {old_id!r} that is not present in the "
+                    f"inner loader's zgroups. Available setup ids: {sorted(zgroup_lookup.keys())}"
+                )
+            tp, zgroup_path = zgroup_lookup[old_id]
+
+            # Attempt to extract the channel from the path, assuming filenames include '_ch_<number>'
+            # (e.g. both '.zarr' and '.ome.zarr' variants). If this pattern is not present or is
+            # formatted differently, we deliberately fall back to channel 0 as a default.
+            channel_match = re.search(r'_ch_(\d+)', zgroup_path)
+            if channel_match:
+                channel = channel_match.group(1)
+            else:
+                # Default to channel 0 when channel information cannot be parsed from the path.
+                channel = 0
+
+            image_loader_data.append({
+                "view_setup": new_id,
+                "timepoint": tp,
+                "series": 1,
+                "channel": channel,
+                "file_path": zgroup_path,
+                "crop_min": crop_min,
+                "crop_max": crop_max,
+                "zarr_base_path": zarr_base_path,
+            })
+
+        return pd.DataFrame(image_loader_data)
 
     def route_image_loader(self, root):
         """
         Directs the XML parsing process based on the image loader format specified in the XML.
         """
         format_node = root.find(".//ImageLoader")
-        format_type = format_node.get("format")
+        if format_node is None:
+            raise ValueError("No <ImageLoader> element found in XML; cannot determine image loader format.")
 
-        if "filemap" in format_type:
+        format_type = (format_node.get("format") or "").lower()
+        if "split" in format_type:
+            return self.parse_image_loader_split_zarr(root)
+        elif "filemap" in format_type:
             return self.parse_image_loader_tiff(root)
         else:
             return self.parse_image_loader_zarr(root)
@@ -104,7 +187,7 @@ def parse_view_setups(self, root):
         """
         viewsetups_data = []
 
-        for vs in root.findall(".//ViewSetup"):
+        for vs in root.findall("./SequenceDescription/ViewSetups/ViewSetup"):
             id_ = vs.find("id").text
             # name = vs.find("name").text
             name = vs.findtext("name")

diff --git a/Rhapso/detection/image_reader.py b/Rhapso/detection/image_reader.py
@@ -91,6 +91,41 @@ def fetch_image_data(self, record, dsxy, dsz):
         dask_array = dask_array.astype(np.float32)
         dask_array = dask_array.transpose()
 
+        # Apply split tile crop if present
+        crop_min = record.get('crop_min')
+        crop_max = record.get('crop_max')
+        if crop_min is not None and crop_max is not None:
+            if len(crop_min) != 3 or len(crop_max) != 3:
+                raise ValueError(
+                    f"crop_min and crop_max must both be length 3 for 3D cropping; "
+                    f"got crop_min={crop_min}, crop_max={crop_max}"
+                )
+
+            # Validate crop bounds are within array dimensions
+            array_shape = dask_array.shape
+            for i in range(3):
+                if crop_min[i] < 0:
+                    raise ValueError(
+                        f"crop_min[{i}]={crop_min[i]} is negative; "
+                        f"crop bounds must be non-negative"
+                    )
+                if crop_max[i] >= array_shape[i]:
+                    raise ValueError(
+                        f"crop_max[{i}]={crop_max[i]} exceeds array dimension {i} "
+                        f"(shape={array_shape[i]}); crop_max must be < array shape"
+                    )
+                if crop_min[i] > crop_max[i]:
+                    raise ValueError(
+                        f"crop_min[{i}]={crop_min[i]} > crop_max[{i}]={crop_max[i]}; "
+                        f"crop_min must be <= crop_max"
+                    )
+
+            dask_array = dask_array[
+                crop_min[0]:crop_max[0] + 1,
+                crop_min[1]:crop_max[1] + 1,
+                crop_min[2]:crop_max[2] + 1
+            ]
+
         # Downsample Dask array
         downsampled_stack = self.interface_downsampling(dask_array, dsxy, dsz)
 

diff --git a/Rhapso/detection/metadata_builder.py b/Rhapso/detection/metadata_builder.py
@@ -1,3 +1,4 @@
+import os
 import numpy as np
 
 """
@@ -21,7 +22,7 @@ def __init__(self, dataframes, overlapping_area, image_file_prefix, file_type, d
         self.sub_region_chunking = not chunks_per_bound == 0
         self.metadata = []
 
-    def build_image_metadata(self, process_intervals, file_path, view_id):
+    def build_image_metadata(self, process_intervals, file_path, view_id, crop_min=None, crop_max=None):
         """
         Builds list of metadata with optional sub-chunking
         """
@@ -41,7 +42,9 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
                     'file_path': file_path,
                     'interval_key': interval_key,
                     'offset': 0,
-                    'lb': lb_fixed
+                    'lb': lb_fixed,
+                    'crop_min': crop_min,
+                    'crop_max': crop_max
                 }) 
 
             # Apply sub-region chunking
@@ -73,8 +76,10 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
                             'file_path': file_path,
                             'interval_key': interval_key,
                             'offset': z,
-                            'lb' : lb
-                        })  
+                            'lb' : lb,
+                            'crop_min': crop_min,
+                            'crop_max': crop_max
+                        })
 
                 elif self.file_type == "zarr":
 
@@ -102,26 +107,44 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
                             'file_path': file_path,
                             'interval_key': interval_key,
                             'offset': z,
-                            'lb' : lb
-                        })  
-
+                            'lb' : lb,
+                            'crop_min': crop_min,
+                            'crop_max': crop_max
+                        })
+
     def build_paths(self):
         """
         Iterates through views to interface metadata building
         """
+        is_split = 'crop_min' in self.image_loader_df.columns
+
         for _, row in self.image_loader_df.iterrows():
             view_id = f"timepoint: {row['timepoint']}, setup: {row['view_setup']}"
             process_intervals = self.overlapping_area[view_id]
-            
+
             if self.file_type == 'zarr':
-                file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}'
+                if is_split:
+                    file_path = os.path.join(row['zarr_base_path'], row['file_path'], str(self.level))
+                else:
+                    file_path = os.path.join(self.image_file_prefix, row['file_path'], str(self.level))
             elif self.file_type == 'tiff':
-                file_path = self.image_file_prefix + row['file_path'] 
+                file_path = os.path.join(self.image_file_prefix, row['file_path'])
             else:
                 raise ValueError(f"Unsupported file_type: {self.file_type!r}")
-
+
+            # Extract and scale crop bounds for split tiles
+            crop_min = None
+            crop_max = None
+            if is_split:
+                scale = 2 ** self.level if self.level is not None else 1
+                cmin = [int(v) // scale for v in row['crop_min'].split()]
+                # For inclusive bounds, use a ceil-style mapping for crop_max to avoid shrinking coverage
+                cmax = [int(np.ceil((int(v) + 1) / scale) - 1) for v in row['crop_max'].split()]
+                crop_min = cmin
+                crop_max = cmax
+
             if self.run_type == 'ray':
-                self.build_image_metadata(process_intervals, file_path, view_id)
+                self.build_image_metadata(process_intervals, file_path, view_id, crop_min, crop_max)
             else:
                 raise ValueError(f"Unsupported run type: {self.run_type!r}")