diff --git a/Rhapso/data_prep/xml_to_dataframe.py b/Rhapso/data_prep/xml_to_dataframe.py index ec105c7..ee1284c 100644 --- a/Rhapso/data_prep/xml_to_dataframe.py +++ b/Rhapso/data_prep/xml_to_dataframe.py @@ -18,8 +18,16 @@ def parse_image_loader_zarr(self, root): for il in root.findall(".//ImageLoader/zgroups/zgroup"): view_setup = il.get("setup") timepoint = il.get("timepoint") - file_path = il.find("path").text if il.find("path") is not None else None - channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] + file_path = il.get("path") + if file_path is None: + element_string = ET.tostring(il, encoding='unicode') + raise ValueError(f"zgroup element missing 'path' attribute: {element_string}") + + # default to channel 0 if not parseable from the path name + try: + channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] + except (IndexError, AttributeError): + channel = 0 image_loader_data.append( { diff --git a/Rhapso/detection/advanced_refinement.py b/Rhapso/detection/advanced_refinement.py index 1c9db5c..500c0d3 100644 --- a/Rhapso/detection/advanced_refinement.py +++ b/Rhapso/detection/advanced_refinement.py @@ -163,7 +163,9 @@ def filter(self): lb = row['lower_bound'] ub = row['upper_bound'] if vid == view_id: - to_process_interval = (lb, ub) + + ub_inclusive = (ub[0]+1, ub[1]+1, ub[2]+1) + to_process_interval = (lb, ub_inclusive) ips_block = [] intensities_block = [] diff --git a/Rhapso/detection/overlap_detection.py b/Rhapso/detection/overlap_detection.py index 2dca34c..20e748c 100644 --- a/Rhapso/detection/overlap_detection.py +++ b/Rhapso/detection/overlap_detection.py @@ -5,6 +5,7 @@ import s3fs import dask.array as da import math +import os """ Overlap Detection figures out where image tile overlap. @@ -50,7 +51,7 @@ def load_image_metadata(self, file_path): return self.image_shape_cache[file_path] if self.file_type == 'zarr': - s3 = s3fs.S3FileSystem(anon=False) + s3 = s3fs.S3FileSystem(anon=True) store = s3fs.S3Map(root=file_path, s3=s3) zarr_array = zarr.open(store, mode='r') dask_array = da.from_zarr(zarr_array) @@ -246,7 +247,8 @@ def find_overlapping_area(self): all_intervals = [] if self.file_type == 'zarr': level, leftovers = self.choose_zarr_level() - dim_base = self.load_image_metadata(self.prefix + row_i['file_path'] + f'/{0}') + + dim_base = self.load_image_metadata(os.path.join(self.prefix, row_i['file_path'])) # isotropic pyramid s = float(2 ** level) @@ -256,7 +258,7 @@ def find_overlapping_area(self): _, dsxy, dsz = leftovers elif self.file_type == 'tiff': - dim_base = self.load_image_metadata(self.prefix + row_i['file_path']) + dim_base = self.load_image_metadata(os.path.join(self.prefix, row_i['file_path'])) mipmap_of_downsample = self.create_mipmap_transform() dsxy, dsz = self.dsxy, self.dsz level = None diff --git a/Rhapso/split_dataset/compute_grid_rules.py b/Rhapso/split_dataset/compute_grid_rules.py index bfc611c..1f99429 100644 --- a/Rhapso/split_dataset/compute_grid_rules.py +++ b/Rhapso/split_dataset/compute_grid_rules.py @@ -23,11 +23,10 @@ def closest_larger_long_divisible_by(self, a, b): return int(a + b - (a % b)) - def find_min_step_size(self): + def find_min_step_size(self, lowest_resolution=(1.0, 1.0, 1.0)): """ Compute the minimal integer step size per axis (X,Y,Z) that is compatible with the chosen lowest resolution """ - lowest_resolution=(64.0, 64.0, 64.0) min_step_size = [1, 1, 1] for d, r in enumerate(lowest_resolution): diff --git a/Rhapso/split_dataset/save_xml.py b/Rhapso/split_dataset/save_xml.py index 5865a5b..d11df06 100644 --- a/Rhapso/split_dataset/save_xml.py +++ b/Rhapso/split_dataset/save_xml.py @@ -575,9 +575,12 @@ def _norm_id(raw): outer_timepoints = ch break if outer_timepoints is None: - outer_timepoints = ET.Element('Timepoints', {'type': 'pattern'}) - ip = ET.SubElement(outer_timepoints, 'integerpattern') - ip.text = "0" + tps = sorted({int(v['old_view'][0]) for v in self.self_definition if v['old_view'][0] is not None}) + first_tp = str(tps[0]) if tps else "0" + last_tp = str(tps[-1]) if tps else "0" + outer_timepoints = ET.Element('Timepoints', {'type': 'range'}) + ET.SubElement(outer_timepoints, 'first').text = first_tp + ET.SubElement(outer_timepoints, 'last').text = last_tp # place right after ViewSetups children = list(outer_seq) insert_idx = children.index(view_setups) + 1 if view_setups in children else len(children) diff --git a/Rhapso/split_dataset/split_images.py b/Rhapso/split_dataset/split_images.py index 0126528..6e27b71 100644 --- a/Rhapso/split_dataset/split_images.py +++ b/Rhapso/split_dataset/split_images.py @@ -8,16 +8,16 @@ import math class SplitImages: - def __init__(self, target_image_size, target_overlap, min_step_size, data_gloabl, n5_path, point_density, min_points, max_points, + def __init__(self, target_image_size, target_overlap, min_step_size, data_global, n5_path, point_density, min_points, max_points, error, excludeRadius): self.target_image_size = target_image_size self.target_overlap = target_overlap self.min_step_size = min_step_size - self.data_global = data_gloabl - self.image_loader_df = data_gloabl['image_loader'] - self.view_setups_df = data_gloabl['view_setups'] - self.view_registrations_df = data_gloabl['view_registrations'] - self.view_interest_points_df = data_gloabl['view_interest_points'] + self.data_global = data_global + self.image_loader_df = data_global['image_loader'] + self.view_setups_df = data_global['view_setups'] + self.view_registrations_df = data_global['view_registrations'] + self.view_interest_points_df = data_global['view_interest_points'] self.n5_path = n5_path self.point_density = point_density self.min_points = min_points @@ -96,21 +96,33 @@ def split_dims(self, input, i, final_size, overlap): to_val = 0 from_val = input_min[i] - while to_val < input[i]: + while to_val < input[i] - 1: to_val = min(input[i], from_val + final_size - 1) dim_intervals.append((from_val, to_val)) from_val = to_val - overlap + 1 return dim_intervals - def last_image_size(self, l, s, o): - num = l - 2 * (s - o) - o - den = s - o - rem = num % den if num >= 0 else -((-num) % den) - size = o + rem - if size < 0: - size = l + size - return size + def last_image_size(self, L, S, O): + """ + Parameters + ---------- + l : int + The length of the input dimension. + s : int + The target split-tile size in this dimension + o : int + The size of ovelap in this dimension. + """ + + stride = S - O + if not (0 <= O < S): + raise ValueError("Require 0 <= O < S") + if L <= 0: + raise ValueError("Require L > 0") + + start_last = ((max(L - S, 0)) // stride) * stride + return L - start_last # will be S when it fits perfectly def distribute_intervals_fixed_overlap(self, input): input = list(map(int, input.split())) @@ -127,7 +139,7 @@ def distribute_intervals_fixed_overlap(self, input): length = input[i] if length <= self.target_image_size[i]: - pass + dim_intervals.append((0, length - 1)) else: l = length @@ -338,139 +350,143 @@ def split_images(self, timepoints, interest_points, fake_label): } new_registrations[(new_view_id_key)] = new_view_registration - - new_v_ip_l = [] - - old_v_ip_l = { - 'points': interest_points[old_view_id], - 'setup': old_id, - 'timepoint': t, - } - - id = 0 - new_ip1 = [] - old_ip_l1 = old_v_ip_l['points'] - old_ip_1 = deepcopy(old_ip_l1['points']) - - for ip in old_ip_1: - if self.contains(ip, interval): - l = deepcopy(ip) - for j in range(len(interval[0])): - l[j] -= interval[0][j] - - new_ip1.append((id, l)) - id += 1 - new_ip_l1 = { - 'base_directory': old_ip_l1['base_path'], - 'corresponding_interest_points': None, - 'interest_points': new_ip1, - 'modified_corresponding_interest_points': None, - 'modified_interest_points': None, - 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/beads_split", - 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - "parameters": old_ip_l1['parameters_split'] - } - - new_v_ip_l.append({ - 'label': "beads_split", - 'ip_list': new_ip_l1 - }) - - new_ip = [] - id = 0 - - for j in range(i): - other_interval = intervals[j] - intersection = self.intersect(interval, other_interval) + new_v_ip_l = [] + if old_view_id in interest_points: + old_v_ip_l = { + 'points': interest_points[old_view_id], + 'setup': old_id, + 'timepoint': t, + } + + id = 0 + new_ip1 = [] + old_ip_l1 = old_v_ip_l['points'] + old_ip_1 = deepcopy(old_ip_l1['points']) - if not self.is_empty(intersection): - other_setup = interval_to_view_setup[(tuple(other_interval[0]), tuple(other_interval[1]))] - other_view_id = f"timepoint: {t}, setup: {other_setup['id']}" - other_ip_list = new_interest_points[other_view_id] - - n = len(interval[0]) - num_pixels = 1 - - for k in range(n): - num_pixels *= (intersection[1][k] - intersection[0][k] + 1) - - num_points = min(self.max_points, max(self.min_points, math.ceil(self.point_density * num_pixels / (100.0*100.0*100.0)))) - other_points = (next((x for x in other_ip_list if x.get("label") == fake_label), {"ip_list": {}})["ip_list"].get("interest_points") or []) - other_id = len(other_points) - - tree2 = None - search2 = None + for ip in old_ip_1: + if self.contains(ip, interval): + l = deepcopy(ip) + for j in range(len(interval[0])): + l[j] -= interval[0][j] + + new_ip1.append((id, l)) + id += 1 + + new_ip_l1 = { + 'base_directory': old_ip_l1['base_path'], + 'corresponding_interest_points': None, + 'interest_points': new_ip1, + 'modified_corresponding_interest_points': None, + 'modified_interest_points': None, + 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/beads_split", + 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + "parameters": old_ip_l1['parameters_split'] + } + + new_v_ip_l.append({ + 'label': "beads_split", + 'ip_list': new_ip_l1 + }) + + if self.max_points > 0: + new_ip = [] + id = 0 - if self.exclude_radius > 0: - other_ip_global = [] + for j in range(i): + other_interval = intervals[j] + intersection = self.intersect(interval, other_interval) - for k, ip in enumerate(other_points): - l = deepcopy(ip[1]) + if not self.is_empty(intersection): + other_setup = interval_to_view_setup[(tuple(other_interval[0]), tuple(other_interval[1]))] + other_view_id = f"timepoint: {t}, setup: {other_setup['id']}" + other_ip_list = new_interest_points[other_view_id] - for m in range(n): - l[m] = l[m] + other_interval[0][m] - - other_ip_global.append((k, l)) + n = len(interval[0]) + num_pixels = 1 - if len(other_ip_global) > 0: - coords = np.vstack([l for _, l in other_ip_global]) - tree2 = cKDTree(coords) + for k in range(n): + num_pixels *= (intersection[1][k] - intersection[0][k] + 1) + + num_points = min(self.max_points, max(self.min_points, math.ceil(self.point_density * num_pixels / (100.0*100.0*100.0)))) + other_points = (next((x for x in other_ip_list if x.get("label") == fake_label), {"ip_list": {}})["ip_list"].get("interest_points") or []) + other_id = len(other_points) - def search2(q_point_global, radius=self.exclude_radius): - idxs = tree2.query_ball_point(np.asarray(q_point_global, float), radius) - return [other_ip_global[k] for k in idxs] - else: tree2 = None search2 = None - - else: - tree2 = None - search2 = None - - tmp = [0.0] * n - for k in range(num_points): - p = [0.0] * n - op = [0.0] * n - - for d in range(n): - l = rnd.random() * (intersection[1][d] - intersection[0][d] + 1) + intersection[0][d] - p[d] = (l + (rnd.random() - 0.5) * self.error) - interval[0][d] - op[d] = (l + (rnd.random() - 0.5) * self.error) - other_interval[0][d] - tmp[d] = l - - num_neighbors = 0 - if self.exclude_radius > 0: - tmp_ip = (0, np.asarray(tmp, dtype=float)) + if self.exclude_radius > 0: + other_ip_global = [] + + for k, ip in enumerate(other_points): + l = deepcopy(ip[1]) + + for m in range(n): + l[m] = l[m] + other_interval[0][m] + + other_ip_global.append((k, l)) + + if len(other_ip_global) > 0: + coords = np.vstack([l for _, l in other_ip_global]) + tree2 = cKDTree(coords) + + def search2(q_point_global, radius=self.exclude_radius): + idxs = tree2.query_ball_point(np.asarray(q_point_global, float), radius) + return [other_ip_global[k] for k in idxs] + else: + tree2 = None + search2 = None - if search2 is not None: - neighbors = search2(tmp_ip[1], self.exclude_radius) - num_neighbors += len(neighbors) - - if num_neighbors == 0: - new_ip.append((id, p)) - other_points.append((other_id, op)) - id += 1 - other_id += 1 + else: + tree2 = None + search2 = None + + tmp = [0.0] * n + + for k in range(num_points): + p = [0.0] * n + op = [0.0] * n + + for d in range(n): + l = rnd.random() * (intersection[1][d] - intersection[0][d] + 1) + intersection[0][d] + p[d] = (l + (rnd.random() - 0.5) * self.error) - interval[0][d] + op[d] = (l + (rnd.random() - 0.5) * self.error) - other_interval[0][d] + tmp[d] = l + + num_neighbors = 0 + if self.exclude_radius > 0: + tmp_ip = (0, np.asarray(tmp, dtype=float)) + + if search2 is not None: + neighbors = search2(tmp_ip[1], self.exclude_radius) + num_neighbors += len(neighbors) + + if num_neighbors == 0: + new_ip.append((id, p)) + other_points.append((other_id, op)) + id += 1 + other_id += 1 + + next(x for x in other_ip_list if x.get("label") == fake_label)["ip_list"]["interest_points"] = other_points - next(x for x in other_ip_list if x.get("label") == fake_label)["ip_list"]["interest_points"] = other_points - - new_ip_l = { - 'base_directory': old_ip_l1['base_path'], - 'corresponding_interest_points': None, - 'interest_points': new_ip, - 'modified_corresponding_interest_points': None, - 'modified_interest_points': None, - 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", - "parameters": old_ip_l1['parameters_fake'] - } - - new_v_ip_l.append({ - 'label': fake_label, - 'ip_list': new_ip_l - }) + new_ip_l = { + 'base_directory': old_ip_l1['base_path'], + 'corresponding_interest_points': None, + 'interest_points': new_ip, + 'modified_corresponding_interest_points': None, + 'modified_interest_points': None, + 'n5_path': f"interestpoints.n5/tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + 'xml_n5_path': f"tpId_{t}_viewSetupId_{new_view_id['setup']}/{fake_label}", + "parameters": old_ip_l1['parameters_fake'] + } + + new_v_ip_l.append({ + 'label': fake_label, + 'ip_list': new_ip_l + }) + + if len(new_v_ip_l) > 0: + new_interest_points[new_view_id_key] = new_v_ip_l self.setup_definition.append({ 'interval': interval, @@ -484,7 +500,6 @@ def search2(q_point_global, radius=self.exclude_radius): 'old_models': transform_list }) - new_interest_points[new_view_id_key] = new_v_ip_l new_id += 1 return new_interest_points @@ -493,6 +508,9 @@ def load_interest_points(self, fake_label): full_path = self.n5_path + "interestpoints.n5" interest_points = {} + if self.view_interest_points_df.empty: + return {} + if full_path.startswith("s3://"): path = full_path.rstrip("/") s3 = s3fs.S3FileSystem(anon=False) diff --git a/Rhapso/split_dataset/xml_to_dataframe_split.py b/Rhapso/split_dataset/xml_to_dataframe_split.py index abe13aa..a4e9a23 100644 --- a/Rhapso/split_dataset/xml_to_dataframe_split.py +++ b/Rhapso/split_dataset/xml_to_dataframe_split.py @@ -17,10 +17,17 @@ def parse_image_loader_zarr(self, root): for il in root.findall(".//ImageLoader/zgroups/zgroup"): view_setup = il.get("setup") - timepoint = il.get("timepoint") - file_path = il.find("path").text if il.find("path") is not None else None - - channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] + timepoint = il.get("tp") or il.get("timepoint") + file_path = il.get("path") + if file_path is None: + element_string = ET.tostring(il, encoding='unicode') + raise ValueError(f"zgroup element missing 'path' attribute: {element_string}") + + # default to channel 0 if not parseable from the path name + try: + channel = file_path.split("_ch_", 1)[1].split(".ome.zarr", 1)[0] + except (IndexError, AttributeError): + channel = 0 image_loader_data.append( {