Skip to content

Commit edf6bbd

Browse files
author
Suraj Mishra
committed
new improved localization script
1 parent 223724d commit edf6bbd

File tree

1 file changed

+387
-0
lines changed

1 file changed

+387
-0
lines changed
Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
#####----------Importing Libraries----------#####
2+
3+
import numpy as np
4+
import pandas as pd
5+
from pathlib import Path
6+
from tqdm import tqdm
7+
import quilt3 as q3
8+
from shutil import rmtree
9+
10+
# 3d meshing libraries
11+
import pyvista as pv
12+
import trimesh
13+
import point_cloud_utils as pcu
14+
from scipy.spatial import Delaunay
15+
16+
from bioio import BioImage
17+
18+
from skimage.measure import regionprops
19+
20+
from EMT_data_analysis.tools import alignment, io
21+
22+
import argparse
23+
24+
#####----------Main Analysis Function----------#####
25+
26+
def nuclei_localization(
27+
df:pd.DataFrame,
28+
data_id:str,
29+
output_directory:str,
30+
align_segmentation:bool=True,
31+
):
32+
'''
33+
This is the main function to localize nuclei inside a 3D mesh.
34+
35+
Parameters
36+
----------
37+
manifest_path: str
38+
Path to the csv manifest of the full dataset
39+
data_id: str
40+
Data ID from manifest for data to process
41+
output_directory: str
42+
Path to the output directory where the localized nuclei data will be saved.
43+
align_segmentation: bool
44+
Flag to enable alignment of the segmentation using the barcode of the movie.
45+
Default is True.
46+
'''
47+
# ensure output directory exists
48+
out_dir = Path(output_directory)
49+
out_dir.mkdir(exist_ok=True, parents=True)
50+
51+
tmp_dir = Path("./emt_tmp/nuclei_localization/")
52+
tmp_dir.mkdir(exist_ok=True, parents=True)
53+
54+
# load segmentations and meshes
55+
# First, check for local ZARR file in the reprocessed directory
56+
local_zarr_base = Path("/allen/aics/emt/all_cells_masks/ZARR_Conversion/August_24_H2B_reprocess_v2/main")
57+
local_zarr_path = local_zarr_base / f"{data_id}_H2B_nuclear_segmentation.ome.zarr"
58+
59+
local_zarr_base_batch2v2 = Path("/allen/aics/emt/nuclear_segmentation/ZARR_Conversion/deliverable_2_v2")
60+
local_zarr_path_batch2v2 = local_zarr_base_batch2v2 / f"{data_id}_H2B_nuclear_segmentation.ome.zarr"
61+
62+
if local_zarr_path.exists():
63+
seg_path = str(local_zarr_path)
64+
print(f"Using batch1 local ZARR: {seg_path}")
65+
elif local_zarr_path_batch2v2.exists():
66+
seg_path = str(local_zarr_path_batch2v2)
67+
print(f"Using batch2_v2 local ZARR: {seg_path}")
68+
elif df['Gene'].values[0] == 'HIST1H2BJ':
69+
seg_path = df['H2B Nuclear Segmentation URL'].values[0]
70+
print(f"Using H2B segmentation from quilt manifest: {seg_path}")
71+
else:
72+
raise ValueError(f"The move {data_id} does not have H2B segmentations")
73+
74+
# import pdb; pdb.set_trace()
75+
segmentations = BioImage(seg_path)
76+
77+
# Check for local mesh files first, then fall back to S3 bucket
78+
# Local mesh directory for resubmission data
79+
local_mesh_base = Path("//allen/aics/emt/basement_membrane_segmentation/Resubmission/compile")
80+
local_mesh_folder = local_mesh_base / f"{data_id}_collagenIV_segmentation_mesh"
81+
82+
mesh_fn = None
83+
use_local_mesh = False
84+
85+
# Check if local mesh folder exists with VTM file
86+
if local_mesh_folder.exists():
87+
local_vtm_files = list(local_mesh_folder.glob('*.vtm'))
88+
if local_vtm_files:
89+
mesh_fn = local_vtm_files[0]
90+
use_local_mesh = True
91+
print(f"Using local mesh: {mesh_fn}")
92+
93+
if not use_local_mesh:
94+
# Download meshes into temporary directory from s3 bucket
95+
mesh_path = df['CollagenIV Segmentation Mesh Folder'].values[0].replace('s3://allencell/', '')
96+
bucket = q3.Bucket("s3://allencell")
97+
try:
98+
bucket.fetch(
99+
mesh_path + '/',
100+
str(tmp_dir) + '/'
101+
)
102+
except Exception as e:
103+
print(f"Failed to download mesh for {data_id}: {e}")
104+
rmtree(tmp_dir, ignore_errors=True)
105+
return
106+
107+
# load meshes - handle both naming conventions:
108+
# 1. DataID-prefixed: {data_id}_collagenIV_segmentation_mesh.vtm
109+
# 2. Generic: collagenIV_segmentation_mesh.vtm
110+
vtm_files = list(tmp_dir.glob('*.vtm'))
111+
if not vtm_files:
112+
print(f"No VTM mesh file found for {data_id} in {tmp_dir}")
113+
rmtree(tmp_dir, ignore_errors=True)
114+
return
115+
mesh_fn = vtm_files[0] # Use the first (and typically only) VTM file
116+
print(f"Using S3 mesh: {mesh_fn}")
117+
118+
119+
# load meshes
120+
meshes = pv.read(mesh_fn)
121+
122+
# localize nuclei for each timepoint
123+
num_timepoints = int(df['Image Size T'].values[0])
124+
nuclei = []
125+
for timepoint in tqdm(range(num_timepoints), desc=f"Movie {data_id}"):
126+
# check if mesh exists for this timepoint
127+
if f'{timepoint}' not in meshes.keys():
128+
print(f"Mesh for timepoint {timepoint} not found.")
129+
continue
130+
131+
if align_segmentation:
132+
alignment_matrix = alignment.parse_rotation_matrix_from_string(df['Dual Camera Alignment Matrix Value'].values[0])
133+
else:
134+
alignment_matrix = np.zeros((3,3))
135+
136+
# localize nuclei
137+
nuclei_tp = localize_for_timepoint(
138+
mesh=meshes[f'{timepoint}'],
139+
seg=segmentations.get_image_data("ZYX", T=timepoint).squeeze(),
140+
align_segmentation=align_segmentation,
141+
alignment_matrix=alignment_matrix
142+
)
143+
144+
nuclei_tp['Data ID'] = data_id
145+
nuclei_tp['Time hr'] = timepoint / 0.5
146+
nuclei.append(nuclei_tp)
147+
148+
# save nuclei data
149+
nuclei = pd.concat(nuclei)
150+
cols = nuclei.columns.tolist()
151+
newcols = cols[-2:]
152+
newcols.extend(cols[:-2])
153+
nuclei = nuclei[newcols]
154+
155+
out_fn = out_dir / (data_id + "_localized_nuclei.csv")
156+
nuclei.to_csv(out_fn, index=False)
157+
rmtree(tmp_dir)
158+
159+
160+
161+
#####----------Helper Functions----------#####
162+
163+
def fill_holes_flat_cap(mesh_pv: pv.PolyData) -> tuple:
164+
"""
165+
Fill holes in a mesh by creating a flat cap at the boundary level.
166+
Uses point_cloud_utils make_mesh_watertight after adding cap geometry.
167+
168+
This is an MIT-licensed alternative to PyMeshFix's GPL-licensed repair.
169+
170+
Parameters
171+
----------
172+
mesh_pv : pv.PolyData
173+
PyVista mesh with holes to fill.
174+
175+
Returns
176+
-------
177+
tuple
178+
(vertices, faces) of the watertight mesh.
179+
"""
180+
vert = mesh_pv.points.copy()
181+
faces = mesh_pv.faces.reshape(-1, 4)[:, 1:].copy()
182+
183+
# Extract boundary edges (the hole outline)
184+
boundary = mesh_pv.extract_feature_edges(
185+
boundary_edges=True, feature_edges=False,
186+
manifold_edges=False, non_manifold_edges=False
187+
)
188+
boundary_points = boundary.points
189+
190+
if len(boundary_points) == 0:
191+
# No holes found, make watertight anyway
192+
vw, fw = pcu.make_mesh_watertight(vert, faces, 10000)
193+
return vw, fw
194+
195+
# Use median Z of boundary as cap level
196+
cap_z = np.percentile(boundary_points[:, 2], 50)
197+
198+
# Map boundary points to vertex indices in original mesh
199+
boundary_indices = []
200+
for bp in boundary_points:
201+
dists = np.linalg.norm(vert - bp, axis=1)
202+
idx = np.argmin(dists)
203+
if dists[idx] < 0.1:
204+
boundary_indices.append(idx)
205+
boundary_indices = np.unique(boundary_indices)
206+
207+
# Create cap vertices (same XY as boundary, but at cap_z)
208+
cap_verts = vert[boundary_indices].copy()
209+
cap_verts[:, 2] = cap_z
210+
211+
# Add cap vertices to mesh
212+
n_orig_verts = len(vert)
213+
new_vert = np.vstack([vert, cap_verts])
214+
cap_vert_indices = np.arange(n_orig_verts, n_orig_verts + len(cap_verts))
215+
boundary_to_cap = dict(zip(boundary_indices, cap_vert_indices))
216+
217+
# Create side faces connecting boundary to cap
218+
boundary_edges = boundary.lines.reshape(-1, 3)[:, 1:]
219+
side_faces = []
220+
for edge in boundary_edges:
221+
p1, p2 = boundary_points[edge[0]], boundary_points[edge[1]]
222+
d1 = np.linalg.norm(vert - p1, axis=1)
223+
d2 = np.linalg.norm(vert - p2, axis=1)
224+
v1, v2 = np.argmin(d1), np.argmin(d2)
225+
if v1 in boundary_to_cap and v2 in boundary_to_cap:
226+
c1, c2 = boundary_to_cap[v1], boundary_to_cap[v2]
227+
side_faces.append([v1, v2, c2])
228+
side_faces.append([v1, c2, c1])
229+
side_faces = np.array(side_faces)
230+
231+
# Create cap faces using Delaunay triangulation
232+
cap_xy = cap_verts[:, :2]
233+
tri = Delaunay(cap_xy)
234+
cap_faces = cap_vert_indices[tri.simplices]
235+
236+
# Combine all faces
237+
new_faces = np.vstack([faces, side_faces, cap_faces])
238+
239+
# Make watertight using point_cloud_utils
240+
vw, fw = pcu.make_mesh_watertight(new_vert, new_faces, 10000)
241+
242+
return vw, fw
243+
244+
245+
def localize_for_timepoint(
246+
mesh:pv.PolyData,
247+
seg:np.ndarray,
248+
align_segmentation:bool,
249+
alignment_matrix:np.ndarray
250+
):
251+
'''
252+
This function localizes nuclei inside a 3D mesh for a given timepoint.
253+
254+
Parameters
255+
----------
256+
mesh: pv.PolyData
257+
3D mesh for the timepoint.
258+
seg: np.ndarray
259+
Nuclei segmentation for the timepoint.
260+
align_segmentation: bool
261+
Flag to enable alignment of the segmentation using the barcode of the movie.
262+
barcode: str
263+
Barcode of the movie.
264+
'''
265+
266+
# align segmentation if required
267+
if align_segmentation:
268+
transform = alignment.get_alignment_matrix(alignment_matrix)
269+
transform = transform.inverse
270+
271+
mf_holes = mesh.extract_feature_edges(boundary_edges=True, feature_edges=False, manifold_edges=False)
272+
outline_verts = mf_holes.points
273+
top = np.percentile(outline_verts[:,2], 99)
274+
for i in range(outline_verts.shape[0]):
275+
vert = outline_verts[i]
276+
mesh.extract_feature_edges(boundary_edges=True, feature_edges=False, manifold_edges=False)
277+
new_vert = np.array([vert[0], vert[1], max([vert[2], top])])
278+
279+
v_idx = mesh.find_closest_point(vert)
280+
mesh.points[v_idx] = new_vert
281+
282+
# transpose segmentation to XYZ coordinates and set z-scale for isotropic resolution
283+
seg = seg.transpose(2, 1, 0)
284+
scale = 2.88 / 0.271
285+
286+
# Fill holes and create watertight mesh using custom flat cap approach
287+
vw, fw = fill_holes_flat_cap(mesh)
288+
mesh = trimesh.Trimesh(vertices=vw, faces=fw)
289+
290+
# initialize ray caster (for checking if a point is inside the mesh)
291+
rayCaster = trimesh.ray.ray_triangle.RayMeshIntersector(mesh)
292+
293+
# initialize nuclei data dictionary
294+
nucData = {}
295+
nucData["Label"] = []
296+
nucData["Inside"] = []
297+
nucData["X"] = []
298+
nucData["Y"] = []
299+
nucData["Z"] = []
300+
301+
# localize nuclei
302+
props = regionprops(seg.astype(int))
303+
for prop in props:
304+
nucData['Label'].append(prop.label)
305+
nucData["X"].append(int(prop.centroid[0]))
306+
nucData["Y"].append(int(prop.centroid[1]))
307+
nucData["Z"].append(int(prop.centroid[2]))
308+
309+
# get nuclei centroid (scales to isotropic resolution)
310+
centroid = [
311+
prop.centroid[0],
312+
prop.centroid[1],
313+
prop.centroid[2] * scale
314+
]
315+
316+
try:
317+
contains = rayCaster.contains_points([centroid])
318+
except:
319+
continue
320+
321+
# check if centroid is inside the mesh
322+
if contains[0]:
323+
nucData['Inside'].append(True)
324+
else:
325+
nucData['Inside'].append(False)
326+
327+
return pd.DataFrame(nucData)
328+
329+
330+
#####----------Run Function Call----------#####
331+
332+
def run_nuclei_localization(
333+
df_manifest:pd.DataFrame,
334+
output_directory:str,
335+
align_segmentation:bool=True,
336+
):
337+
'''
338+
This is the main function to localize nuclei inside a 3D mesh.
339+
340+
Parameters
341+
----------
342+
manifest_path: str
343+
Path to the csv manifest of the full dataset
344+
data_id: str
345+
Data ID from manifest for data to process
346+
output_directory: str
347+
Path to the output directory where the localized nuclei data will be saved.
348+
align_segmentation: bool
349+
Flag to enable alignment of the segmentation using the barcode of the movie.
350+
Default is True.
351+
'''
352+
# Filter to specific Data IDs for analysis
353+
ANALYSIS_DATA_IDS = [
354+
'3500005548_43', '3500005548_46', '3500005548_48',
355+
'3500005824_35', '3500005824_36', '3500005824_37', '3500005824_38',
356+
'3500005828_43', '3500005828_45', '3500005828_46', '3500005828_67', '3500005828_70',
357+
'3500006256_19', '3500006256_21',
358+
'3500007081_8',
359+
'3500007213_38',
360+
'3500007247_5',
361+
'3500007432_52', '3500007432_57', '3500007432_61', '3500007432_63',
362+
]
363+
364+
df_cond = df_manifest[df_manifest['Data ID'].isin(ANALYSIS_DATA_IDS)]
365+
366+
print(f"Processing {len(df_cond)} movies with CollagenIV segmentations.")
367+
368+
for data_id in tqdm(pd.unique(df_cond['Data ID']), desc="Movies"):
369+
df_id = df_manifest[df_manifest['Data ID'] == data_id]
370+
371+
# make sure the movie has the required segmentations
372+
nuclei_localization(
373+
df=df_id,
374+
data_id=data_id,
375+
output_directory=output_directory,
376+
align_segmentation=align_segmentation
377+
)
378+
379+
#####----------Argument Parsing----------#####
380+
if __name__ == '__main__':
381+
manifest = io.load_imaging_and_segmentation_dataset()
382+
output_dir = io.setup_base_directory_name("nuclei_localization")
383+
384+
run_nuclei_localization(
385+
df_manifest=manifest,
386+
output_directory=output_dir
387+
)

0 commit comments

Comments
 (0)