diff --git a/Rhapso/split_dataset/compute_grid_rules.py b/Rhapso/split_dataset/compute_grid_rules.py index bfc611c..6346856 100644 --- a/Rhapso/split_dataset/compute_grid_rules.py +++ b/Rhapso/split_dataset/compute_grid_rules.py @@ -65,7 +65,8 @@ def run(self): Executes the entry point of the script. """ # image_sizes, min_size = self.collect_image_sizes() - min_step_size = self.find_min_step_size() + # min_step_size = self.find_min_step_size() + min_step_size = [1, 1, 1] sx = self.closest_larger_long_divisible_by(self.target_image_size[0], min_step_size[0]) sy = self.closest_larger_long_divisible_by(self.target_image_size[1], min_step_size[1]) diff --git a/Rhapso/split_dataset/save_xml.py b/Rhapso/split_dataset/save_xml.py index 03abba3..70f767b 100644 --- a/Rhapso/split_dataset/save_xml.py +++ b/Rhapso/split_dataset/save_xml.py @@ -1,6 +1,7 @@ import numpy as np import boto3 import re +import copy from xml.etree import ElementTree as ET class SaveXML: @@ -11,44 +12,358 @@ def __init__(self, data_global, new_split_interest_points, self_definition, xml_ self.xml_file = xml_file self.xml_output_path = xml_output_path + def save_tile_attributes_to_xml(self, xml): + """ + Ensure the *last* (the outer split one) has: + - old_tile_0..N + - ... + - with locations from Image Splitting + - + """ + root = ET.fromstring(xml) + + def tagname(el): + return el.tag.split('}')[-1] + + def find_one(tag): + el = root.find(f'.//{{*}}{tag}') + if el is None: + el = root.find(tag) + return el + + def _norm_id(raw): + if isinstance(raw, (tuple, list)): + return int(raw[1] if len(raw) > 1 else raw[0]) + return int(raw) + + # --- find ALL ViewSetups blocks --- + view_setups_all = root.findall('.//{*}ViewSetups') + if not view_setups_all: + return xml # nothing to do + + # Outer split ViewSetups = last, inner original = first non-outer + outer_vs = view_setups_all[-1] + inner_vs = None + for vs in view_setups_all: + if vs is not outer_vs: + inner_vs = vs + break + + # --- collect existing Attributes on OUTER --- + children = list(outer_vs) + attr_by_name = {} + for ch in children: + if tagname(ch) == 'Attributes': + nm = ch.get('name') + if nm: + attr_by_name[nm] = ch + + # --- ensure CHANNEL attributes (can still be cloned from inner) --- + if 'channel' not in attr_by_name and inner_vs is not None: + for ch in list(inner_vs): + if tagname(ch) != 'Attributes': + continue + nm = ch.get('name') + if nm == 'channel': + cloned = copy.deepcopy(ch) + outer_vs.append(cloned) + attr_by_name['channel'] = cloned + break + + # --- build/overwrite ILLUMINATION attributes: old_tile_0..N --- + illum_attrs = attr_by_name.get('illumination') + if illum_attrs is None: + illum_attrs = ET.Element('Attributes', {'name': 'illumination'}) + outer_vs.append(illum_attrs) + attr_by_name['illumination'] = illum_attrs + else: + # clear existing entries + for ch in list(illum_attrs): + illum_attrs.remove(ch) + + # unique original tile ids from old_view + orig_tile_ids = sorted({_norm_id(v['old_view']) for v in self.self_definition}) + + for tid in orig_tile_ids: + illum_el = ET.SubElement(illum_attrs, 'Illumination') + ET.SubElement(illum_el, 'id').text = str(tid) + ET.SubElement(illum_el, 'name').text = f"old_tile_{tid}" + + # --- ensure ANGLE attributes: a single Angle id=0/name=0 --- + angle_attrs = attr_by_name.get('angle') + if angle_attrs is None: + # try clone from inner if it exists + if inner_vs is not None: + for ch in list(inner_vs): + if tagname(ch) == 'Attributes' and ch.get('name') == 'angle': + angle_attrs = copy.deepcopy(ch) + outer_vs.append(angle_attrs) + break + # if no inner angle, synthesize default + if angle_attrs is None: + angle_attrs = ET.Element('Attributes', {'name': 'angle'}) + angle_el = ET.SubElement(angle_attrs, 'Angle') + ET.SubElement(angle_el, 'id').text = "0" + ET.SubElement(angle_el, 'name').text = "0" + outer_vs.append(angle_attrs) + else: + # if it exists but has no , make one + has_angle = any(tagname(ch) == 'Angle' for ch in angle_attrs) + if not has_angle: + angle_el = ET.SubElement(angle_attrs, 'Angle') + ET.SubElement(angle_el, 'id').text = "0" + ET.SubElement(angle_el, 'name').text = "0" + + attr_by_name['angle'] = angle_attrs + + # ---- find or create under OUTER ---- + children = list(outer_vs) + tile_attrs = None + insert_idx = len(children) # default: append at end + + for i, ch in enumerate(children): + if tagname(ch) == 'Attributes': + name_attr = ch.get('name') + # remember existing tile attributes if present + if name_attr == 'tile': + tile_attrs = ch + # prefer to insert tile after channel attributes if we create it + if name_attr == 'channel': + insert_idx = i + 1 + + if tile_attrs is None: + tile_attrs = ET.Element('Attributes', {'name': 'tile'}) + outer_vs.insert(insert_idx, tile_attrs) + + # ---- figure out which tile ids (new_view ids) we care about ---- + target_ids = {_norm_id(v['new_view']) for v in self.self_definition} + + # Remove existing Tile entries for those ids (so we can rewrite cleanly) + for child in list(tile_attrs): + if tagname(child) != 'Tile': + continue + id_el = child.find('id') or child.find('{*}id') + if id_el is None or not id_el.text: + continue + try: + if int(id_el.text.strip()) in target_ids: + tile_attrs.remove(child) + except Exception: + pass + + # ---- build a map: setup_id -> (tx, ty, tz) from 'Image Splitting' ---- + view_regs = find_one('ViewRegistrations') + tile_locations = {} + + if view_regs is not None: + # iterate over ViewRegistration elements, namespace-agnostic + for vr in view_regs.findall('.//{*}ViewRegistration'): + setup_attr = vr.get('setup') + if setup_attr is None: + continue + try: + setup_id = int(setup_attr) + except ValueError: + continue + + if setup_id not in target_ids: + continue + + # find the Image Splitting transform + for vt in vr.findall('./{*}ViewTransform'): + name_el = vt.find('Name') or vt.find('{*}Name') + if name_el is None: + continue + if (name_el.text or '').strip().lower() != 'image splitting': + continue + + aff_el = vt.find('affine') or vt.find('{*}affine') + if aff_el is None or not aff_el.text: + continue + + nums = re.findall( + r'[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?', + aff_el.text + ) + + # 3x4 affine: we expect at least 12 numbers + if len(nums) >= 12: + tx, ty, tz = map(float, (nums[3], nums[7], nums[11])) + elif len(nums) >= 3: + # fallback: last 3 numbers + tx, ty, tz = map(float, nums[-3:]) + else: + tx = ty = tz = 0.0 + + tile_locations[setup_id] = (tx, ty, tz) + break # stop after first 'Image Splitting' for this VR + + # ---- create Tile entries for each new_view ---- + for view in self.self_definition: + new_id = _norm_id(view['new_view']) + + if new_id in tile_locations: + loc = tile_locations[new_id] + else: + # Fallback: use min bound of interval if we didn't find an image splitting transform + mins = np.array(view['interval'][0], dtype=float) + loc = (float(mins[0]), float(mins[1]), float(mins[2])) + + tile_el = ET.SubElement(tile_attrs, 'Tile') + ET.SubElement(tile_el, 'id').text = str(new_id) + ET.SubElement(tile_el, 'name').text = str(new_id) + ET.SubElement(tile_el, 'location').text = f"{loc[0]:.1f} {loc[1]:.1f} {loc[2]:.1f}" + + # ---- reorder children in OUTER : + # all first, then in illumination, channel, tile, angle order ---- + children = list(outer_vs) + + viewsetup_children = [ch for ch in children if tagname(ch) == 'ViewSetup'] + attr_children = [ch for ch in children if tagname(ch) == 'Attributes'] + other_children = [ch for ch in children if tagname(ch) not in ('ViewSetup', 'Attributes')] + + # desired attributes order + attr_order = {'illumination': 0, 'channel': 1, 'tile': 2, 'angle': 3} + + def _attr_sort_key(el): + name = el.get('name', '') + return attr_order.get(name, 99) + + attr_children.sort(key=_attr_sort_key) + + # Clear existing children and re-append in desired order + for ch in children: + outer_vs.remove(ch) + + for ch in viewsetup_children + attr_children + other_children: + outer_vs.append(ch) + + try: + ET.indent(root, space=" ") + except Exception: + pass + + return ET.tostring(root, encoding='unicode') + def wrap_image_loader_for_split(self, xml: str) -> str: + """ + Wrap the top-level ImageLoader in + and move the ORIGINAL ViewSetups/Timepoints/MissingViews into an inner + inside that wrapper. + + Resulting structure: + + + + + + ... + + (ORIGINAL) + (ORIGINAL) + + + + + + + ... + + """ root = ET.fromstring(xml) - def tn(el): return el.tag.split('}')[-1] + def tn(el): + return el.tag.split('}')[-1] + def find_one(tag): el = root.find(f'.//{{*}}{tag}') return el if el is not None else root.find(tag) - seq = find_one('SequenceDescription') + seq = None + # Prefer the top-level SequenceDescription (direct child of root) + for ch in list(root): + if tn(ch) == 'SequenceDescription': + seq = ch + break + if seq is None: + seq = find_one('SequenceDescription') if seq is None: - return xml - - # find the first immediate ImageLoader under SequenceDescription - loaders = [ch for ch in list(seq) if tn(ch) == 'ImageLoader'] - if not loaders: return xml - inner = loaders[0] + children = list(seq) - fmt = (inner.get('format') or '').lower() - if fmt == 'split.viewerimgloader': + # Find the first immediate ImageLoader under SequenceDescription + base_loader = None + base_loader_idx = None + for i, ch in enumerate(children): + if tn(ch) == 'ImageLoader': + base_loader = ch + base_loader_idx = i + break + + if base_loader is None: return xml - - # handle the case where the *outer* wrapper already exists - if any(tn(ch) == 'ImageLoader' for ch in list(inner)) and fmt.startswith('bdv'): + + fmt = (base_loader.get('format') or '').lower() + # Already wrapped; assume layout is correct and do nothing + if fmt == 'split.viewerimgloader': return xml - # wrap the current loader - idx = list(seq).index(inner) - seq.remove(inner) + # Collect any other ImageLoader siblings (other sources) + other_imageloaders = [] + # Collect ORIGINAL ViewSetups / Timepoints / MissingViews that are siblings + orig_viewsetups = None + orig_timepoints = None + orig_missingviews = None + + for ch in children[base_loader_idx + 1:]: + name = tn(ch) + if name == 'ImageLoader': + other_imageloaders.append(ch) + elif name == 'ViewSetups': + orig_viewsetups = ch + elif name == 'Timepoints': + orig_timepoints = ch + elif name == 'MissingViews': + orig_missingviews = ch + + + # Remove them from the outer SequenceDescription + for node in (orig_viewsetups, orig_timepoints, orig_missingviews, *other_imageloaders): + if node is not None and node in seq: + seq.remove(node) + + # Remove the original loader from seq + seq.remove(base_loader) + + # Build wrapper wrapper = ET.Element('ImageLoader', {'format': 'split.viewerimgloader'}) - wrapper.append(inner) - seq.insert(idx, wrapper) + # First child: original loader + wrapper.append(base_loader) + for other_loader in other_imageloaders: + wrapper.append(other_loader) + print("Added multiple ImageLoader sources to split.viewerimgloader wrapper.") + + # Inner that holds the original ViewSetups/Timepoints/MissingViews + inner_seq = ET.Element('SequenceDescription') + + if orig_viewsetups is not None: + inner_seq.append(orig_viewsetups) + if orig_timepoints is not None: + inner_seq.append(orig_timepoints) + if orig_missingviews is not None: + inner_seq.append(orig_missingviews) + + wrapper.append(inner_seq) + + # Insert wrapper where the original loader was + seq.insert(base_loader_idx, wrapper) try: ET.indent(root, space=" ") except Exception: pass + return ET.tostring(root, encoding='unicode') def save_view_interest_points(self, xml): @@ -202,46 +517,66 @@ def find_one(tag): return ET.tostring(root, encoding='unicode') def save_setup_id_to_xml(self, xml): + """ + Create/overwrite the OUTER (split tiles) and ensure outer + and exist under the top-level SequenceDescription. + + Outer layout target: + + + + + + ... + (original) + ... (from save_setup_id_definition_to_xml) + + <-- created here (ids 0..499) + ... + ... + ... + + + 0 + + + + ... + + """ root = ET.fromstring(xml) - def tagname(el): + def tn(el): return el.tag.split('}')[-1] - def find_one(tag): - el = root.find(f'.//{{*}}{tag}') - if el is None: - el = root.find(tag) - return el - - seq = find_one('SequenceDescription') - regs = find_one('ViewRegistrations') - setup_ids = find_one('SetupIds') - if setup_ids is None: - setup_ids = ET.Element('SetupIds') - kids = list(root) - insert_idx = len(kids) - if regs is not None and regs in kids: - insert_idx = kids.index(regs) - elif seq is not None and seq in kids: - insert_idx = kids.index(seq) + 1 - root.insert(insert_idx, setup_ids) + # Find top-level SequenceDescription + outer_seq = None + for ch in list(root): + if tn(ch) == 'SequenceDescription': + outer_seq = ch + break + if outer_seq is None: + outer_seq = root.find('.//{*}SequenceDescription') + if outer_seq is None: + return xml + # Find or create OUTER under SequenceDescription view_setups = None - for ch in list(root): - if tagname(ch) == 'ViewSetups': + for ch in list(outer_seq): + if tn(ch) == 'ViewSetups': view_setups = ch break - if view_setups is None: - view_setups = find_one('ViewSetups') + if view_setups is None: view_setups = ET.Element('ViewSetups') - kids = list(root) - after_idx = -1 - for i, ch in enumerate(kids): - if tagname(ch) in ('ImageLoader', 'SequenceDescription'): - after_idx = i - root.insert(after_idx + 1 if after_idx >= 0 else len(kids), view_setups) - + children = list(outer_seq) + insert_idx = len(children) + for i, ch in enumerate(children): + if tn(ch) == 'ImageLoader': + insert_idx = i + 1 + outer_seq.insert(insert_idx, view_setups) + + # Helper to normalize ids def _norm_id(raw): if isinstance(raw, (tuple, list)): if len(raw) >= 2: @@ -249,11 +584,11 @@ def _norm_id(raw): return int(raw[0]) return int(raw) - target_ids = set(_norm_id(v['new_view']) for v in self.self_definition) + target_ids = {_norm_id(v['new_view']) for v in self.self_definition} - # ViewSetup cleanup + # Remove any existing ViewSetup with those ids (outer only) for child in list(view_setups): - if tagname(child) != 'ViewSetup': + if tn(child) != 'ViewSetup': continue id_el = child.find('id') or child.find('{*}id') if id_el is not None and id_el.text: @@ -263,40 +598,22 @@ def _norm_id(raw): except Exception: pass - # SetupIdDefinition cleanup - for sid in list(setup_ids): - if tagname(sid) != 'SetupIdDefinition': - continue - nid_el = sid.find('NewId') or sid.find('{*}NewId') - if nid_el is not None and nid_el.text: - try: - if int(nid_el.text.strip()) in target_ids: - setup_ids.remove(sid) - except Exception: - pass - + # (Re)build ViewSetups for each new split view for view in self.self_definition: new_id = _norm_id(view['new_view']) - old_id = _norm_id(view['old_view']) - angle = view['angle'] - channel = view['channel'] + # old_id = _norm_id(view['old_view']) # not strictly needed here + + angle = view['angle'] + channel = view['channel'] illumination = view['illumination'] - tile = new_id - voxel_unit = view['voxel_unit'] - voxel_size = view['voxel_dim'] + tile = new_id + voxel_unit = view['voxel_unit'] + voxel_size = view['voxel_dim'] mins = np.array(view["interval"][0], dtype=np.int64) maxs = np.array(view["interval"][1], dtype=np.int64) size = (maxs - mins + 1).tolist() - # / - def_el = ET.SubElement(setup_ids, 'SetupIdDefinition') - ET.SubElement(def_el, 'NewId').text = str(new_id) - ET.SubElement(def_el, 'OldId').text = str(old_id) - ET.SubElement(def_el, 'min').text = f"{int(mins[0])} {int(mins[1])} {int(mins[2])}" - ET.SubElement(def_el, 'max').text = f"{int(maxs[0])} {int(maxs[1])} {int(maxs[2])}" - - # / vs = ET.SubElement(view_setups, 'ViewSetup') ET.SubElement(vs, 'id').text = str(new_id) ET.SubElement(vs, 'size').text = f"{int(size[0])} {int(size[1])} {int(size[2])}" @@ -314,6 +631,31 @@ def _norm_id(raw): ET.SubElement(attrs, 'tile').text = str(int(tile)) ET.SubElement(attrs, 'angle').text = str(int(angle)) + # Ensure outer exists + outer_timepoints = None + for ch in list(outer_seq): + if tn(ch) == 'Timepoints': + outer_timepoints = ch + break + if outer_timepoints is None: + outer_timepoints = ET.Element('Timepoints', {'type': 'pattern'}) + ip = ET.SubElement(outer_timepoints, 'integerpattern') + ip.text = "0" + # place right after ViewSetups + children = list(outer_seq) + insert_idx = children.index(view_setups) + 1 if view_setups in children else len(children) + outer_seq.insert(insert_idx, outer_timepoints) + + # Ensure outer exists + outer_missing = None + for ch in list(outer_seq): + if tn(ch) == 'MissingViews': + outer_missing = ch + break + if outer_missing is None: + outer_missing = ET.Element('MissingViews') + outer_seq.append(outer_missing) + try: ET.indent(root, space=" ") except Exception: @@ -322,38 +664,92 @@ def _norm_id(raw): return ET.tostring(root, encoding='unicode') def save_setup_id_definition_to_xml(self, xml): + """ + Create/overwrite for the split views. + + In the desired final layout, SetupIds lives inside: + + + ... + ... + ... <-- here + + ... + + """ root = ET.fromstring(xml) - # find existing nodes (namespace-agnostic) - def tagname(el): return el.tag.split('}')[-1] - children = list(root) - regs_idx = next((i for i, ch in enumerate(children) if tagname(ch) == 'ViewRegistrations'), None) - seq_idx = next((i for i, ch in enumerate(children) if tagname(ch) == 'SequenceDescription'), None) - setup_ids = next((ch for ch in children if tagname(ch) == 'SetupIds'), None) + def tn(el): + return el.tag.split('}')[-1] + + # Find top-level SequenceDescription + outer_seq = None + for ch in list(root): + if tn(ch) == 'SequenceDescription': + outer_seq = ch + break + if outer_seq is None: + outer_seq = root.find('.//{*}SequenceDescription') + if outer_seq is None: + return xml + + # Find the wrapper ImageLoader format="split.viewerimgloader" + wrapper = None + for ch in list(outer_seq): + if tn(ch) == 'ImageLoader' and (ch.get('format') or '').lower() == 'split.viewerimgloader': + wrapper = ch + break + + # If wrapper not found, fall back to old behavior (root-level SetupIds) + parent_for_setupids = wrapper if wrapper is not None else root + children = list(parent_for_setupids) + + # Locate existing under the chosen parent + setup_ids = None + for ch in children: + if tn(ch) == 'SetupIds': + setup_ids = ch + break - # create/position if setup_ids is None: setup_ids = ET.Element('SetupIds') - insert_idx = regs_idx if regs_idx is not None else ((seq_idx + 1) if seq_idx is not None else len(children)) - root.insert(insert_idx, setup_ids) + if wrapper is not None: + # Under wrapper: insert after inner SequenceDescription if present + inner_children = list(wrapper) + inner_seq = None + for ich in inner_children: + if tn(ich) == 'SequenceDescription': + inner_seq = ich + break + insert_idx = inner_children.index(inner_seq) + 1 if inner_seq is not None else len(inner_children) + wrapper.insert(insert_idx, setup_ids) + else: + # Root-level fallback: put before if present + root_children = list(root) + regs_idx = next((i for i, ch in enumerate(root_children) if tn(ch) == 'ViewRegistrations'), None) + insert_idx = regs_idx if regs_idx is not None else len(root_children) + root.insert(insert_idx, setup_ids) else: - setup_ids.clear() + # Clear existing definitions so we can rewrite + setup_ids.clear() + # Now populate SetupIdDefinition from self.self_definition for view in self.self_definition: new_id = view['new_view'] old_id = view['old_view'] min_bound = view['interval'][0] max_bound = view['interval'][1] - + + # Normalize IDs (can be int or (tp, setup)) nid = int(new_id[1] if isinstance(new_id, (tuple, list)) else new_id) oid = int(old_id[1] if isinstance(old_id, (tuple, list)) else old_id) def_el = ET.SubElement(setup_ids, 'SetupIdDefinition') ET.SubElement(def_el, 'NewId').text = str(nid) ET.SubElement(def_el, 'OldId').text = str(oid) - ET.SubElement(def_el, 'min').text = f"{int(min_bound[0])} {int(min_bound[1])} {int(min_bound[2])}" - ET.SubElement(def_el, 'max').text = f"{int(max_bound[0])} {int(max_bound[1])} {int(max_bound[2])}" - + ET.SubElement(def_el, 'min').text = f"{int(min_bound[0])} {int(min_bound[1])} {int(min_bound[2])}" + ET.SubElement(def_el, 'max').text = f"{int(max_bound[0])} {int(max_bound[1])} {int(max_bound[2])}" + try: ET.indent(root, space=" ") except Exception: @@ -362,11 +758,13 @@ def tagname(el): return el.tag.split('}')[-1] return ET.tostring(root, encoding='unicode') def run(self): - xml = self.save_setup_id_definition_to_xml(self.xml_file) + xml = self.xml_file + xml = self.wrap_image_loader_for_split(xml) + xml = self.save_setup_id_definition_to_xml(xml) xml = self.save_setup_id_to_xml(xml) xml = self.save_view_registrations_to_xml(xml) + xml = self.save_tile_attributes_to_xml(xml) xml = self.save_view_interest_points(xml) - xml = self.wrap_image_loader_for_split(xml) if self.xml_output_path: if self.xml_output_path.startswith("s3://"): diff --git a/Rhapso/split_dataset/split_images.py b/Rhapso/split_dataset/split_images.py index 0126528..d0df2f1 100644 --- a/Rhapso/split_dataset/split_images.py +++ b/Rhapso/split_dataset/split_images.py @@ -8,16 +8,16 @@ import math class SplitImages: - def __init__(self, target_image_size, target_overlap, min_step_size, data_gloabl, n5_path, point_density, min_points, max_points, + def __init__(self, target_image_size, target_overlap, min_step_size, data_global, n5_path, point_density, min_points, max_points, error, excludeRadius): self.target_image_size = target_image_size self.target_overlap = target_overlap self.min_step_size = min_step_size - self.data_global = data_gloabl - self.image_loader_df = data_gloabl['image_loader'] - self.view_setups_df = data_gloabl['view_setups'] - self.view_registrations_df = data_gloabl['view_registrations'] - self.view_interest_points_df = data_gloabl['view_interest_points'] + self.data_global = data_global + self.image_loader_df = data_global['image_loader'] + self.view_setups_df = data_global['view_setups'] + self.view_registrations_df = data_global['view_registrations'] + self.view_interest_points_df = data_global['view_interest_points'] self.n5_path = n5_path self.point_density = point_density self.min_points = min_points @@ -96,21 +96,24 @@ def split_dims(self, input, i, final_size, overlap): to_val = 0 from_val = input_min[i] - while to_val < input[i]: + while to_val < input[i]-1: to_val = min(input[i], from_val + final_size - 1) dim_intervals.append((from_val, to_val)) from_val = to_val - overlap + 1 return dim_intervals - def last_image_size(self, l, s, o): - num = l - 2 * (s - o) - o - den = s - o - rem = num % den if num >= 0 else -((-num) % den) - size = o + rem - if size < 0: - size = l + size - return size + + def last_image_size(self, L, S, O): + stride = S - O + if not (0 <= O < S): + raise ValueError("Require 0 <= O < S") + if L <= 0: + raise ValueError("Require L > 0") + + start_last = ((max(L - S, 0)) // stride) * stride + return L - start_last # will be S when it fits perfectly + def distribute_intervals_fixed_overlap(self, input): input = list(map(int, input.split())) @@ -127,8 +130,8 @@ def distribute_intervals_fixed_overlap(self, input): length = input[i] if length <= self.target_image_size[i]: - pass - + dim_intervals.append((0, length - 1)) + else: l = length s = self.target_image_size[i] @@ -338,139 +341,143 @@ def split_images(self, timepoints, interest_points, fake_label): } new_registrations[(new_view_id_key)] = new_view_registration - - new_v_ip_l = [] - - old_v_ip_l = { - 'points': interest_points[old_view_id], - 'setup': old_id, - 'timepoint': t, - } - - id = 0 - new_ip1 = [] - old_ip_l1 = old_v_ip_l['points'] - old_ip_1 = deepcopy(old_ip_l1['points']) - - for ip in old_ip_1: - if self.contains(ip, interval): - l = deepcopy(ip) - for j in range(len(interval[0])): - l[j] -= interval[0][j] - - new_ip1.append((id, l)) - id += 1 - new_ip_l1 = { - 'base_directory': old_ip_l1['base_path'], - 'corresponding_interest_points': None, - 'interest_points': new_ip1, - 'modified_corresponding_interest_points': None, - 'modified_interest_points': None, - 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/beads_split", - 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - "parameters": old_ip_l1['parameters_split'] - } - - new_v_ip_l.append({ - 'label': "beads_split", - 'ip_list': new_ip_l1 - }) - - new_ip = [] - id = 0 - - for j in range(i): - other_interval = intervals[j] - intersection = self.intersect(interval, other_interval) + new_v_ip_l = [] + if old_view_id in interest_points: + old_v_ip_l = { + 'points': interest_points[old_view_id], + 'setup': old_id, + 'timepoint': t, + } + + id = 0 + new_ip1 = [] + old_ip_l1 = old_v_ip_l['points'] + old_ip_1 = deepcopy(old_ip_l1['points']) - if not self.is_empty(intersection): - other_setup = interval_to_view_setup[(tuple(other_interval[0]), tuple(other_interval[1]))] - other_view_id = f"timepoint: {t}, setup: {other_setup['id']}" - other_ip_list = new_interest_points[other_view_id] - - n = len(interval[0]) - num_pixels = 1 - - for k in range(n): - num_pixels *= (intersection[1][k] - intersection[0][k] + 1) - - num_points = min(self.max_points, max(self.min_points, math.ceil(self.point_density * num_pixels / (100.0*100.0*100.0)))) - other_points = (next((x for x in other_ip_list if x.get("label") == fake_label), {"ip_list": {}})["ip_list"].get("interest_points") or []) - other_id = len(other_points) - - tree2 = None - search2 = None + for ip in old_ip_1: + if self.contains(ip, interval): + l = deepcopy(ip) + for j in range(len(interval[0])): + l[j] -= interval[0][j] + + new_ip1.append((id, l)) + id += 1 + + new_ip_l1 = { + 'base_directory': old_ip_l1['base_path'], + 'corresponding_interest_points': None, + 'interest_points': new_ip1, + 'modified_corresponding_interest_points': None, + 'modified_interest_points': None, + 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/beads_split", + 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + "parameters": old_ip_l1['parameters_split'] + } + + new_v_ip_l.append({ + 'label': "beads_split", + 'ip_list': new_ip_l1 + }) + + if self.max_points > 0: + new_ip = [] + id = 0 - if self.exclude_radius > 0: - other_ip_global = [] + for j in range(i): + other_interval = intervals[j] + intersection = self.intersect(interval, other_interval) - for k, ip in enumerate(other_points): - l = deepcopy(ip[1]) - - for m in range(n): - l[m] = l[m] + other_interval[0][m] + if not self.is_empty(intersection): + other_setup = interval_to_view_setup[(tuple(other_interval[0]), tuple(other_interval[1]))] + other_view_id = f"timepoint: {t}, setup: {other_setup['id']}" + other_ip_list = new_interest_points[other_view_id] - other_ip_global.append((k, l)) + n = len(interval[0]) + num_pixels = 1 - if len(other_ip_global) > 0: - coords = np.vstack([l for _, l in other_ip_global]) - tree2 = cKDTree(coords) + for k in range(n): + num_pixels *= (intersection[1][k] - intersection[0][k] + 1) + + num_points = min(self.max_points, max(self.min_points, math.ceil(self.point_density * num_pixels / (100.0*100.0*100.0)))) + other_points = (next((x for x in other_ip_list if x.get("label") == fake_label), {"ip_list": {}})["ip_list"].get("interest_points") or []) + other_id = len(other_points) - def search2(q_point_global, radius=self.exclude_radius): - idxs = tree2.query_ball_point(np.asarray(q_point_global, float), radius) - return [other_ip_global[k] for k in idxs] - else: tree2 = None search2 = None - - else: - tree2 = None - search2 = None - - tmp = [0.0] * n - for k in range(num_points): - p = [0.0] * n - op = [0.0] * n - - for d in range(n): - l = rnd.random() * (intersection[1][d] - intersection[0][d] + 1) + intersection[0][d] - p[d] = (l + (rnd.random() - 0.5) * self.error) - interval[0][d] - op[d] = (l + (rnd.random() - 0.5) * self.error) - other_interval[0][d] - tmp[d] = l - - num_neighbors = 0 - if self.exclude_radius > 0: - tmp_ip = (0, np.asarray(tmp, dtype=float)) + if self.exclude_radius > 0: + other_ip_global = [] + + for k, ip in enumerate(other_points): + l = deepcopy(ip[1]) + + for m in range(n): + l[m] = l[m] + other_interval[0][m] + + other_ip_global.append((k, l)) + + if len(other_ip_global) > 0: + coords = np.vstack([l for _, l in other_ip_global]) + tree2 = cKDTree(coords) + + def search2(q_point_global, radius=self.exclude_radius): + idxs = tree2.query_ball_point(np.asarray(q_point_global, float), radius) + return [other_ip_global[k] for k in idxs] + else: + tree2 = None + search2 = None - if search2 is not None: - neighbors = search2(tmp_ip[1], self.exclude_radius) - num_neighbors += len(neighbors) - - if num_neighbors == 0: - new_ip.append((id, p)) - other_points.append((other_id, op)) - id += 1 - other_id += 1 + else: + tree2 = None + search2 = None + + tmp = [0.0] * n + + for k in range(num_points): + p = [0.0] * n + op = [0.0] * n + + for d in range(n): + l = rnd.random() * (intersection[1][d] - intersection[0][d] + 1) + intersection[0][d] + p[d] = (l + (rnd.random() - 0.5) * self.error) - interval[0][d] + op[d] = (l + (rnd.random() - 0.5) * self.error) - other_interval[0][d] + tmp[d] = l + + num_neighbors = 0 + if self.exclude_radius > 0: + tmp_ip = (0, np.asarray(tmp, dtype=float)) + + if search2 is not None: + neighbors = search2(tmp_ip[1], self.exclude_radius) + num_neighbors += len(neighbors) + + if num_neighbors == 0: + new_ip.append((id, p)) + other_points.append((other_id, op)) + id += 1 + other_id += 1 + + next(x for x in other_ip_list if x.get("label") == fake_label)["ip_list"]["interest_points"] = other_points - next(x for x in other_ip_list if x.get("label") == fake_label)["ip_list"]["interest_points"] = other_points - - new_ip_l = { - 'base_directory': old_ip_l1['base_path'], - 'corresponding_interest_points': None, - 'interest_points': new_ip, - 'modified_corresponding_interest_points': None, - 'modified_interest_points': None, - 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - "parameters": old_ip_l1['parameters_fake'] - } - - new_v_ip_l.append({ - 'label': fake_label, - 'ip_list': new_ip_l - }) + new_ip_l = { + 'base_directory': old_ip_l1['base_path'], + 'corresponding_interest_points': None, + 'interest_points': new_ip, + 'modified_corresponding_interest_points': None, + 'modified_interest_points': None, + 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + "parameters": old_ip_l1['parameters_fake'] + } + + new_v_ip_l.append({ + 'label': fake_label, + 'ip_list': new_ip_l + }) + + if len(new_v_ip_l) > 0: + new_interest_points[new_view_id_key] = new_v_ip_l self.setup_definition.append({ 'interval': interval, @@ -484,7 +491,6 @@ def search2(q_point_global, radius=self.exclude_radius): 'old_models': transform_list }) - new_interest_points[new_view_id_key] = new_v_ip_l new_id += 1 return new_interest_points @@ -493,6 +499,10 @@ def load_interest_points(self, fake_label): full_path = self.n5_path + "interestpoints.n5" interest_points = {} + # Skip loading interest points if dataframe is empty + if self.view_interest_points_df.empty: + return {} + if full_path.startswith("s3://"): path = full_path.rstrip("/") s3 = s3fs.S3FileSystem(anon=False) @@ -503,6 +513,7 @@ def load_interest_points(self, fake_label): store = zarr.N5Store(full_path) root = zarr.open(store, mode="r") + for _, row in self.view_interest_points_df.iterrows(): view_id = f"timepoint: {row['timepoint']}, setup: {row['setup']}" interestpoints_prefix = f"{row['path']}/interestpoints/loc/" diff --git a/Rhapso/split_dataset/xml_to_dataframe_split.py b/Rhapso/split_dataset/xml_to_dataframe_split.py index abe13aa..5d27dc1 100644 --- a/Rhapso/split_dataset/xml_to_dataframe_split.py +++ b/Rhapso/split_dataset/xml_to_dataframe_split.py @@ -20,8 +20,8 @@ def parse_image_loader_zarr(self, root): timepoint = il.get("timepoint") file_path = il.find("path").text if il.find("path") is not None else None - channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] - + # channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] + channel = 0 # Placeholder for channel extraction logic image_loader_data.append( { "view_setup": view_setup,