1+ #####----------Importing Libraries----------#####
2+
3+ import numpy as np
4+ import pandas as pd
5+ from pathlib import Path
6+ from tqdm import tqdm
7+ import quilt3 as q3
8+ from shutil import rmtree
9+
10+ # 3d meshing libraries
11+ import pyvista as pv
12+ import trimesh
13+ import point_cloud_utils as pcu
14+ from scipy .spatial import Delaunay
15+
16+ from bioio import BioImage
17+
18+ from skimage .measure import regionprops
19+
20+ from EMT_data_analysis .tools import alignment , io
21+
22+ import argparse
23+
24+ #####----------Main Analysis Function----------#####
25+
26+ def nuclei_localization (
27+ df :pd .DataFrame ,
28+ data_id :str ,
29+ output_directory :str ,
30+ align_segmentation :bool = True ,
31+ ):
32+ '''
33+ This is the main function to localize nuclei inside a 3D mesh.
34+
35+ Parameters
36+ ----------
37+ manifest_path: str
38+ Path to the csv manifest of the full dataset
39+ data_id: str
40+ Data ID from manifest for data to process
41+ output_directory: str
42+ Path to the output directory where the localized nuclei data will be saved.
43+ align_segmentation: bool
44+ Flag to enable alignment of the segmentation using the barcode of the movie.
45+ Default is True.
46+ '''
47+ # ensure output directory exists
48+ out_dir = Path (output_directory )
49+ out_dir .mkdir (exist_ok = True , parents = True )
50+
51+ tmp_dir = Path ("./emt_tmp/nuclei_localization/" )
52+ tmp_dir .mkdir (exist_ok = True , parents = True )
53+
54+ # load segmentations and meshes
55+ # First, check for local ZARR file in the reprocessed directory
56+ local_zarr_base = Path ("/allen/aics/emt/all_cells_masks/ZARR_Conversion/August_24_H2B_reprocess_v2/main" )
57+ local_zarr_path = local_zarr_base / f"{ data_id } _H2B_nuclear_segmentation.ome.zarr"
58+
59+ local_zarr_base_batch2v2 = Path ("/allen/aics/emt/nuclear_segmentation/ZARR_Conversion/deliverable_2_v2" )
60+ local_zarr_path_batch2v2 = local_zarr_base_batch2v2 / f"{ data_id } _H2B_nuclear_segmentation.ome.zarr"
61+
62+ if local_zarr_path .exists ():
63+ seg_path = str (local_zarr_path )
64+ print (f"Using batch1 local ZARR: { seg_path } " )
65+ elif local_zarr_path_batch2v2 .exists ():
66+ seg_path = str (local_zarr_path_batch2v2 )
67+ print (f"Using batch2_v2 local ZARR: { seg_path } " )
68+ elif df ['Gene' ].values [0 ] == 'HIST1H2BJ' :
69+ seg_path = df ['H2B Nuclear Segmentation URL' ].values [0 ]
70+ print (f"Using H2B segmentation from quilt manifest: { seg_path } " )
71+ else :
72+ raise ValueError (f"The move { data_id } does not have H2B segmentations" )
73+
74+ # import pdb; pdb.set_trace()
75+ segmentations = BioImage (seg_path )
76+
77+ # Check for local mesh files first, then fall back to S3 bucket
78+ # Local mesh directory for resubmission data
79+ local_mesh_base = Path ("//allen/aics/emt/basement_membrane_segmentation/Resubmission/compile" )
80+ local_mesh_folder = local_mesh_base / f"{ data_id } _collagenIV_segmentation_mesh"
81+
82+ mesh_fn = None
83+ use_local_mesh = False
84+
85+ # Check if local mesh folder exists with VTM file
86+ if local_mesh_folder .exists ():
87+ local_vtm_files = list (local_mesh_folder .glob ('*.vtm' ))
88+ if local_vtm_files :
89+ mesh_fn = local_vtm_files [0 ]
90+ use_local_mesh = True
91+ print (f"Using local mesh: { mesh_fn } " )
92+
93+ if not use_local_mesh :
94+ # Download meshes into temporary directory from s3 bucket
95+ mesh_path = df ['CollagenIV Segmentation Mesh Folder' ].values [0 ].replace ('s3://allencell/' , '' )
96+ bucket = q3 .Bucket ("s3://allencell" )
97+ try :
98+ bucket .fetch (
99+ mesh_path + '/' ,
100+ str (tmp_dir ) + '/'
101+ )
102+ except Exception as e :
103+ print (f"Failed to download mesh for { data_id } : { e } " )
104+ rmtree (tmp_dir , ignore_errors = True )
105+ return
106+
107+ # load meshes - handle both naming conventions:
108+ # 1. DataID-prefixed: {data_id}_collagenIV_segmentation_mesh.vtm
109+ # 2. Generic: collagenIV_segmentation_mesh.vtm
110+ vtm_files = list (tmp_dir .glob ('*.vtm' ))
111+ if not vtm_files :
112+ print (f"No VTM mesh file found for { data_id } in { tmp_dir } " )
113+ rmtree (tmp_dir , ignore_errors = True )
114+ return
115+ mesh_fn = vtm_files [0 ] # Use the first (and typically only) VTM file
116+ print (f"Using S3 mesh: { mesh_fn } " )
117+
118+
119+ # load meshes
120+ meshes = pv .read (mesh_fn )
121+
122+ # localize nuclei for each timepoint
123+ num_timepoints = int (df ['Image Size T' ].values [0 ])
124+ nuclei = []
125+ for timepoint in tqdm (range (num_timepoints ), desc = f"Movie { data_id } " ):
126+ # check if mesh exists for this timepoint
127+ if f'{ timepoint } ' not in meshes .keys ():
128+ print (f"Mesh for timepoint { timepoint } not found." )
129+ continue
130+
131+ if align_segmentation :
132+ alignment_matrix = alignment .parse_rotation_matrix_from_string (df ['Dual Camera Alignment Matrix Value' ].values [0 ])
133+ else :
134+ alignment_matrix = np .zeros ((3 ,3 ))
135+
136+ # localize nuclei
137+ nuclei_tp = localize_for_timepoint (
138+ mesh = meshes [f'{ timepoint } ' ],
139+ seg = segmentations .get_image_data ("ZYX" , T = timepoint ).squeeze (),
140+ align_segmentation = align_segmentation ,
141+ alignment_matrix = alignment_matrix
142+ )
143+
144+ nuclei_tp ['Data ID' ] = data_id
145+ nuclei_tp ['Time hr' ] = timepoint / 0.5
146+ nuclei .append (nuclei_tp )
147+
148+ # save nuclei data
149+ nuclei = pd .concat (nuclei )
150+ cols = nuclei .columns .tolist ()
151+ newcols = cols [- 2 :]
152+ newcols .extend (cols [:- 2 ])
153+ nuclei = nuclei [newcols ]
154+
155+ out_fn = out_dir / (data_id + "_localized_nuclei.csv" )
156+ nuclei .to_csv (out_fn , index = False )
157+ rmtree (tmp_dir )
158+
159+
160+
161+ #####----------Helper Functions----------#####
162+
163+ def fill_holes_flat_cap (mesh_pv : pv .PolyData ) -> tuple :
164+ """
165+ Fill holes in a mesh by creating a flat cap at the boundary level.
166+ Uses point_cloud_utils make_mesh_watertight after adding cap geometry.
167+
168+ This is an MIT-licensed alternative to PyMeshFix's GPL-licensed repair.
169+
170+ Parameters
171+ ----------
172+ mesh_pv : pv.PolyData
173+ PyVista mesh with holes to fill.
174+
175+ Returns
176+ -------
177+ tuple
178+ (vertices, faces) of the watertight mesh.
179+ """
180+ vert = mesh_pv .points .copy ()
181+ faces = mesh_pv .faces .reshape (- 1 , 4 )[:, 1 :].copy ()
182+
183+ # Extract boundary edges (the hole outline)
184+ boundary = mesh_pv .extract_feature_edges (
185+ boundary_edges = True , feature_edges = False ,
186+ manifold_edges = False , non_manifold_edges = False
187+ )
188+ boundary_points = boundary .points
189+
190+ if len (boundary_points ) == 0 :
191+ # No holes found, make watertight anyway
192+ vw , fw = pcu .make_mesh_watertight (vert , faces , 10000 )
193+ return vw , fw
194+
195+ # Use median Z of boundary as cap level
196+ cap_z = np .percentile (boundary_points [:, 2 ], 50 )
197+
198+ # Map boundary points to vertex indices in original mesh
199+ boundary_indices = []
200+ for bp in boundary_points :
201+ dists = np .linalg .norm (vert - bp , axis = 1 )
202+ idx = np .argmin (dists )
203+ if dists [idx ] < 0.1 :
204+ boundary_indices .append (idx )
205+ boundary_indices = np .unique (boundary_indices )
206+
207+ # Create cap vertices (same XY as boundary, but at cap_z)
208+ cap_verts = vert [boundary_indices ].copy ()
209+ cap_verts [:, 2 ] = cap_z
210+
211+ # Add cap vertices to mesh
212+ n_orig_verts = len (vert )
213+ new_vert = np .vstack ([vert , cap_verts ])
214+ cap_vert_indices = np .arange (n_orig_verts , n_orig_verts + len (cap_verts ))
215+ boundary_to_cap = dict (zip (boundary_indices , cap_vert_indices ))
216+
217+ # Create side faces connecting boundary to cap
218+ boundary_edges = boundary .lines .reshape (- 1 , 3 )[:, 1 :]
219+ side_faces = []
220+ for edge in boundary_edges :
221+ p1 , p2 = boundary_points [edge [0 ]], boundary_points [edge [1 ]]
222+ d1 = np .linalg .norm (vert - p1 , axis = 1 )
223+ d2 = np .linalg .norm (vert - p2 , axis = 1 )
224+ v1 , v2 = np .argmin (d1 ), np .argmin (d2 )
225+ if v1 in boundary_to_cap and v2 in boundary_to_cap :
226+ c1 , c2 = boundary_to_cap [v1 ], boundary_to_cap [v2 ]
227+ side_faces .append ([v1 , v2 , c2 ])
228+ side_faces .append ([v1 , c2 , c1 ])
229+ side_faces = np .array (side_faces )
230+
231+ # Create cap faces using Delaunay triangulation
232+ cap_xy = cap_verts [:, :2 ]
233+ tri = Delaunay (cap_xy )
234+ cap_faces = cap_vert_indices [tri .simplices ]
235+
236+ # Combine all faces
237+ new_faces = np .vstack ([faces , side_faces , cap_faces ])
238+
239+ # Make watertight using point_cloud_utils
240+ vw , fw = pcu .make_mesh_watertight (new_vert , new_faces , 10000 )
241+
242+ return vw , fw
243+
244+
245+ def localize_for_timepoint (
246+ mesh :pv .PolyData ,
247+ seg :np .ndarray ,
248+ align_segmentation :bool ,
249+ alignment_matrix :np .ndarray
250+ ):
251+ '''
252+ This function localizes nuclei inside a 3D mesh for a given timepoint.
253+
254+ Parameters
255+ ----------
256+ mesh: pv.PolyData
257+ 3D mesh for the timepoint.
258+ seg: np.ndarray
259+ Nuclei segmentation for the timepoint.
260+ align_segmentation: bool
261+ Flag to enable alignment of the segmentation using the barcode of the movie.
262+ barcode: str
263+ Barcode of the movie.
264+ '''
265+
266+ # align segmentation if required
267+ if align_segmentation :
268+ transform = alignment .get_alignment_matrix (alignment_matrix )
269+ transform = transform .inverse
270+
271+ mf_holes = mesh .extract_feature_edges (boundary_edges = True , feature_edges = False , manifold_edges = False )
272+ outline_verts = mf_holes .points
273+ top = np .percentile (outline_verts [:,2 ], 99 )
274+ for i in range (outline_verts .shape [0 ]):
275+ vert = outline_verts [i ]
276+ mesh .extract_feature_edges (boundary_edges = True , feature_edges = False , manifold_edges = False )
277+ new_vert = np .array ([vert [0 ], vert [1 ], max ([vert [2 ], top ])])
278+
279+ v_idx = mesh .find_closest_point (vert )
280+ mesh .points [v_idx ] = new_vert
281+
282+ # transpose segmentation to XYZ coordinates and set z-scale for isotropic resolution
283+ seg = seg .transpose (2 , 1 , 0 )
284+ scale = 2.88 / 0.271
285+
286+ # Fill holes and create watertight mesh using custom flat cap approach
287+ vw , fw = fill_holes_flat_cap (mesh )
288+ mesh = trimesh .Trimesh (vertices = vw , faces = fw )
289+
290+ # initialize ray caster (for checking if a point is inside the mesh)
291+ rayCaster = trimesh .ray .ray_triangle .RayMeshIntersector (mesh )
292+
293+ # initialize nuclei data dictionary
294+ nucData = {}
295+ nucData ["Label" ] = []
296+ nucData ["Inside" ] = []
297+ nucData ["X" ] = []
298+ nucData ["Y" ] = []
299+ nucData ["Z" ] = []
300+
301+ # localize nuclei
302+ props = regionprops (seg .astype (int ))
303+ for prop in props :
304+ nucData ['Label' ].append (prop .label )
305+ nucData ["X" ].append (int (prop .centroid [0 ]))
306+ nucData ["Y" ].append (int (prop .centroid [1 ]))
307+ nucData ["Z" ].append (int (prop .centroid [2 ]))
308+
309+ # get nuclei centroid (scales to isotropic resolution)
310+ centroid = [
311+ prop .centroid [0 ],
312+ prop .centroid [1 ],
313+ prop .centroid [2 ] * scale
314+ ]
315+
316+ try :
317+ contains = rayCaster .contains_points ([centroid ])
318+ except :
319+ continue
320+
321+ # check if centroid is inside the mesh
322+ if contains [0 ]:
323+ nucData ['Inside' ].append (True )
324+ else :
325+ nucData ['Inside' ].append (False )
326+
327+ return pd .DataFrame (nucData )
328+
329+
330+ #####----------Run Function Call----------#####
331+
332+ def run_nuclei_localization (
333+ df_manifest :pd .DataFrame ,
334+ output_directory :str ,
335+ align_segmentation :bool = True ,
336+ ):
337+ '''
338+ This is the main function to localize nuclei inside a 3D mesh.
339+
340+ Parameters
341+ ----------
342+ manifest_path: str
343+ Path to the csv manifest of the full dataset
344+ data_id: str
345+ Data ID from manifest for data to process
346+ output_directory: str
347+ Path to the output directory where the localized nuclei data will be saved.
348+ align_segmentation: bool
349+ Flag to enable alignment of the segmentation using the barcode of the movie.
350+ Default is True.
351+ '''
352+ # Filter to specific Data IDs for analysis
353+ ANALYSIS_DATA_IDS = [
354+ '3500005548_43' , '3500005548_46' , '3500005548_48' ,
355+ '3500005824_35' , '3500005824_36' , '3500005824_37' , '3500005824_38' ,
356+ '3500005828_43' , '3500005828_45' , '3500005828_46' , '3500005828_67' , '3500005828_70' ,
357+ '3500006256_19' , '3500006256_21' ,
358+ '3500007081_8' ,
359+ '3500007213_38' ,
360+ '3500007247_5' ,
361+ '3500007432_52' , '3500007432_57' , '3500007432_61' , '3500007432_63' ,
362+ ]
363+
364+ df_cond = df_manifest [df_manifest ['Data ID' ].isin (ANALYSIS_DATA_IDS )]
365+
366+ print (f"Processing { len (df_cond )} movies with CollagenIV segmentations." )
367+
368+ for data_id in tqdm (pd .unique (df_cond ['Data ID' ]), desc = "Movies" ):
369+ df_id = df_manifest [df_manifest ['Data ID' ] == data_id ]
370+
371+ # make sure the movie has the required segmentations
372+ nuclei_localization (
373+ df = df_id ,
374+ data_id = data_id ,
375+ output_directory = output_directory ,
376+ align_segmentation = align_segmentation
377+ )
378+
379+ #####----------Argument Parsing----------#####
380+ if __name__ == '__main__' :
381+ manifest = io .load_imaging_and_segmentation_dataset ()
382+ output_dir = io .setup_base_directory_name ("nuclei_localization" )
383+
384+ run_nuclei_localization (
385+ df_manifest = manifest ,
386+ output_directory = output_dir
387+ )
0 commit comments