Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ ipython_config.py
# install all needed dependencies.
#Pipfile.lock

# uv
uv.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

Expand Down
92 changes: 87 additions & 5 deletions Rhapso/data_prep/xml_to_dataframe.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import xml.etree.ElementTree as ET
import re

# This component recieves an XML file containing Tiff or Zarr image metadata and converts
Comment thread
seanmcculloch marked this conversation as resolved.
Outdated
# it into several Dataframes
Expand Down Expand Up @@ -83,17 +84,98 @@ def parse_image_loader_tiff(self, root):
# Convert the list to a DataFrame and return
return pd.DataFrame(image_loader_data)

def parse_image_loader_split_zarr(self):
pass
def parse_image_loader_split_zarr(self, root):
"""
Parses a split.viewerimgloader XML structure where a single source image is virtually
subdivided into overlapping tiles via SetupIdDefinitions.

Parameters
----------
root : xml.etree.ElementTree.Element
Root element of the parsed XML.

Returns
-------
pd.DataFrame
One row per split tile with columns: view_setup, timepoint, series, channel,
file_path, crop_min, crop_max, zarr_base_path.
"""
outer_loader = root.find(".//ImageLoader[@format='split.viewerimgloader']")
if outer_loader is None:
raise ValueError(
"split.viewerimgloader ImageLoader node not found in XML; "
"ensure the XML contains an ImageLoader with format='split.viewerimgloader'."
)

inner_loader = outer_loader.find("ImageLoader")
if inner_loader is None:
raise ValueError(
"Nested ImageLoader node not found inside split.viewerimgloader configuration."
)

zarr_elem = inner_loader.find("zarr")
if zarr_elem is None or zarr_elem.text is None:
raise ValueError(
"<zarr> node with base path is missing from split.viewerimgloader configuration."
)

zarr_base_path = zarr_elem.text.strip()
# Build lookup from source setup id to (timepoint, zgroup_path)
zgroup_lookup = {}
for zg in inner_loader.findall(".//zgroups/zgroup"):
setup = zg.get("setup")
tp = zg.get("tp") or zg.get("timepoint")
path = zg.get("path")
zgroup_lookup[setup] = (tp, path)

image_loader_data = []
for sid in outer_loader.findall(".//SetupIds/SetupIdDefinition"):
new_id = sid.find("NewId").text.strip()
old_id = sid.find("OldId").text.strip()
crop_min = sid.find("min").text.strip()
crop_max = sid.find("max").text.strip()

if old_id not in zgroup_lookup:
raise ValueError(
"SetupIdDefinition refers to OldId {!r} that is not present in the "
"inner loader's zgroups. Available setup ids: {}".format(
old_id, sorted(zgroup_lookup.keys())
)
Comment thread
seanmcculloch marked this conversation as resolved.
Outdated
)
tp, zgroup_path = zgroup_lookup[old_id]

# Extract channel using regex to handle both .zarr and .ome.zarr
channel_match = re.search(r'_ch_(\d+)', zgroup_path)
if channel_match:
channel = channel_match.group(1)
else:
Comment thread
seanmcculloch marked this conversation as resolved.
Outdated
channel = 0

image_loader_data.append({
"view_setup": new_id,
"timepoint": tp,
"series": 1,
"channel": channel,
"file_path": zgroup_path,
"crop_min": crop_min,
"crop_max": crop_max,
"zarr_base_path": zarr_base_path,
})

return pd.DataFrame(image_loader_data)

def route_image_loader(self, root):
"""
Directs the XML parsing process based on the image loader format specified in the XML.
"""
format_node = root.find(".//ImageLoader")
format_type = format_node.get("format")
if format_node is None:
raise ValueError("No <ImageLoader> element found in XML; cannot determine image loader format.")

if "filemap" in format_type:
format_type = (format_node.get("format") or "").lower()
if "split" in format_type:
return self.parse_image_loader_split_zarr(root)
elif "filemap" in format_type:
return self.parse_image_loader_tiff(root)
else:
return self.parse_image_loader_zarr(root)
Expand All @@ -104,7 +186,7 @@ def parse_view_setups(self, root):
"""
viewsetups_data = []

for vs in root.findall(".//ViewSetup"):
for vs in root.findall("./SequenceDescription/ViewSetups/ViewSetup"):
id_ = vs.find("id").text
# name = vs.find("name").text
name = vs.findtext("name")
Expand Down
15 changes: 15 additions & 0 deletions Rhapso/detection/image_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ def fetch_image_data(self, record, dsxy, dsz):
dask_array = dask_array.astype(np.float32)
dask_array = dask_array.transpose()

# Apply split tile crop if present
crop_min = record.get('crop_min')
crop_max = record.get('crop_max')
if crop_min is not None and crop_max is not None:
if len(crop_min) != 3 or len(crop_max) != 3:
raise ValueError(
f"crop_min and crop_max must both be length 3 for 3D cropping; "
f"got crop_min={crop_min}, crop_max={crop_max}"
)
dask_array = dask_array[
crop_min[0]:crop_max[0] + 1,
crop_min[1]:crop_max[1] + 1,
crop_min[2]:crop_max[2] + 1
]
Comment on lines +97 to +127
Copy link

Copilot AI Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No validation that crop bounds are within the array dimensions. If crop_min or crop_max values exceed the array shape after transpose, Dask will raise an IndexError. Consider adding validation to provide a clearer error message, for example: check that crop_max[i] < dask_array.shape[i] for each dimension before applying the crop.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback


# Downsample Dask array
downsampled_stack = self.interface_downsampling(dask_array, dsxy, dsz)

Expand Down
46 changes: 34 additions & 12 deletions Rhapso/detection/metadata_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(self, dataframes, overlapping_area, image_file_prefix, file_type, d
self.sub_region_chunking = not chunks_per_bound == 0
self.metadata = []

def build_image_metadata(self, process_intervals, file_path, view_id):
def build_image_metadata(self, process_intervals, file_path, view_id, crop_min=None, crop_max=None):
"""
Builds list of metadata with optional sub-chunking
"""
Expand All @@ -41,7 +41,9 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
'file_path': file_path,
'interval_key': interval_key,
'offset': 0,
'lb': lb_fixed
'lb': lb_fixed,
'crop_min': crop_min,
'crop_max': crop_max
})

# Apply sub-region chunking
Expand Down Expand Up @@ -73,8 +75,10 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
'file_path': file_path,
'interval_key': interval_key,
'offset': z,
'lb' : lb
})
'lb' : lb,
'crop_min': crop_min,
'crop_max': crop_max
})

elif self.file_type == "zarr":

Expand Down Expand Up @@ -102,26 +106,44 @@ def build_image_metadata(self, process_intervals, file_path, view_id):
'file_path': file_path,
'interval_key': interval_key,
'offset': z,
'lb' : lb
})

'lb' : lb,
'crop_min': crop_min,
'crop_max': crop_max
})

def build_paths(self):
"""
Iterates through views to interface metadata building
"""
is_split = 'crop_min' in self.image_loader_df.columns

for _, row in self.image_loader_df.iterrows():
view_id = f"timepoint: {row['timepoint']}, setup: {row['view_setup']}"
process_intervals = self.overlapping_area[view_id]

if self.file_type == 'zarr':
file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}'
if is_split:
file_path = row['zarr_base_path'] + row['file_path'] + f'/{self.level}'
Comment thread
seanmcculloch marked this conversation as resolved.
Outdated
else:
file_path = self.image_file_prefix + row['file_path'] + f'/{self.level}'
elif self.file_type == 'tiff':
file_path = self.image_file_prefix + row['file_path']
file_path = self.image_file_prefix + row['file_path']
else:
raise ValueError(f"Unsupported file_type: {self.file_type!r}")


# Extract and scale crop bounds for split tiles
crop_min = None
crop_max = None
if is_split:
scale = 2 ** self.level if self.level is not None else 1
cmin = [int(v) // scale for v in row['crop_min'].split()]
# For inclusive bounds, use a ceil-style mapping for crop_max to avoid shrinking coverage
cmax = [int(np.ceil((int(v) + 1) / scale) - 1) for v in row['crop_max'].split()]
crop_min = cmin
crop_max = cmax

if self.run_type == 'ray':
self.build_image_metadata(process_intervals, file_path, view_id)
self.build_image_metadata(process_intervals, file_path, view_id, crop_min, crop_max)
else:
raise ValueError(f"Unsupported run type: {self.run_type!r}")

Expand Down
Loading