Merge pull request #30 from AllenCell/reSub/August_updatefig

smishra3 · web-flow · commit 52cb0cd4df1f · 2025-11-13T09:43:59.000-08:00
Re sub/august updatefig
diff --git a/.gitignore b/.gitignore
@@ -171,4 +171,7 @@ cython_debug/
 pdm.toml
 
 #csv
-*.csv
+*.csv
+
+#mesh temporary director
+emt_tmp/
diff --git a/EMT_data_analysis/analysis_scripts/Analysis_tools.py b/EMT_data_analysis/analysis_scripts/Analysis_tools.py
@@ -88,6 +88,7 @@ def load_io_data(df):
     ]]
 
     df_io = io.load_inside_outside_classification()
+    df_io = df_io[df_io['Z']<27]
 
     dfio_merged=pd.merge(df_io, df_info, on='Data ID', suffixes=['','_remove'])
     remove = [col for col in dfio_merged.columns if 'remove' in col]
diff --git a/EMT_data_analysis/analysis_scripts/Feature_extraction.py b/EMT_data_analysis/analysis_scripts/Feature_extraction.py
@@ -38,7 +38,7 @@ def compute_bf_colony_features_all_movies(output_folder, align=True):
         print(f"Movie: {movie_id}")
         
         print("Getting raw data...")
-        raw_path = df_movie["File URL"].values[0]
+        raw_path = df_movie["Raw File URL"].values[0]
         raw_reader = BioImage(raw_path)
         print(raw_path)
         print(raw_reader.shape)
diff --git a/EMT_data_analysis/analysis_scripts/Nuclei_localization.py b/EMT_data_analysis/analysis_scripts/Nuclei_localization.py
@@ -11,7 +11,6 @@
 import pyvista as pv
 import trimesh
 import point_cloud_utils as pcu
-import pymeshfix as mf
 
 from bioio import BioImage
 
@@ -25,7 +24,7 @@
 
 def nuclei_localization(
         df:pd.DataFrame, 
-        movie_id:str,
+        data_id:str,
         output_directory:str,
         align_segmentation:bool=True,
     ):
@@ -36,8 +35,8 @@ def nuclei_localization(
         ----------
         manifest_path: str
             Path to the csv manifest of the full dataset
-        movie_id: str
-            Movie ID from manifest for data to process
+        data_id: str
+            Data ID from manifest for data to process
         output_directory: str
             Path to the output directory where the localized nuclei data will be saved.
         align_segmentation: bool
@@ -57,7 +56,7 @@ def nuclei_localization(
     elif df['Gene'].values[0] == 'EOMES|TBR2':
         seg_path = df['EOMES Nuclear Segmentation URL'].values[0]
     else:
-        raise ValueError(f"The move {movie_id} does not have EOMES or H2B segmentations")
+        raise ValueError(f"The move {data_id} does not have EOMES or H2B segmentations")
         
     # import pdb; pdb.set_trace()
     segmentations = BioImage(df['CollagenIV Segmentation Probability URL'].values[0])
@@ -77,7 +76,7 @@ def nuclei_localization(
     # localize nuclei for each timepoint
     num_timepoints = int(df['Image Size T'].values[0])
     nuclei = []
-    for timepoint in tqdm(range(num_timepoints), desc=f"Movie {movie_id}"):
+    for timepoint in tqdm(range(num_timepoints), desc=f"Movie {data_id}"):
         # check if mesh exists for this timepoint
         if f'{timepoint}' not in meshes.keys():
             print(f"Mesh for timepoint {timepoint} not found.")
@@ -87,7 +86,7 @@ def nuclei_localization(
             break
         
         if align_segmentation:
-            alignment_matrix = alignment.parse_rotation_matrix_from_string(df['Camera Alignment Matrix'].values[0])
+            alignment_matrix = alignment.parse_rotation_matrix_from_string(df['Dual Camera Alignment Matrix Value'].values[0])
         else:
             alignment_matrix = np.zeros((3,3))
 
@@ -99,7 +98,7 @@ def nuclei_localization(
             alignment_matrix=alignment_matrix
         )
         
-        nuclei_tp['Movie ID'] = movie_id
+        nuclei_tp['Data ID'] = data_id
         nuclei_tp['Time hr'] = timepoint / 0.5
         nuclei.append(nuclei_tp)
         
@@ -110,7 +109,7 @@ def nuclei_localization(
     newcols.extend(cols[:-2])
     nuclei = nuclei[newcols]
 
-    out_fn = out_dir / (movie_id + "_localized_nuclei.csv")
+    out_fn = out_dir / (data_id + "_localized_nuclei.csv")
     nuclei.to_csv(out_fn, index=False)
     rmtree(tmp_dir)
 
@@ -230,8 +229,8 @@ def run_nuclei_localization(
         ----------
         manifest_path: str
             Path to the csv manifest of the full dataset
-        movie_id: str
-            Movie ID from manifest for data to process
+        data_id: str
+            Data ID from manifest for data to process
         output_directory: str
             Path to the output directory where the localized nuclei data will be saved.
         align_segmentation: bool
@@ -244,13 +243,13 @@ def run_nuclei_localization(
 
     print(f"Processing {len(df_cond)} movies with CollagenIV segmentations.")
 
-    for movie_id in tqdm(pd.unique(df_cond['Movie ID']), desc="Movies"):
-        df_id = df_manifest[df_manifest['Movie ID'] == movie_id]
+    for data_id in tqdm(pd.unique(df_cond['Data ID']), desc="Movies"):
+        df_id = df_manifest[df_manifest['Data ID'] == data_id]
 
         # make sure the movie has the required segmentations
         nuclei_localization(
             df=df_id,
-            movie_id=movie_id,
+            data_id=data_id,
             output_directory=output_directory,
             align_segmentation=align_segmentation
         )
diff --git a/EMT_data_analysis/figure_generation/colony_mask.py b/EMT_data_analysis/figure_generation/colony_mask.py
@@ -14,37 +14,37 @@
 from skimage.morphology import remove_small_objects
 import argparse
 from typing import List
-
+from EMT_data_analysis.tools import io, const
 
 def main(
-        dataset_manifest_path: str,
-        colony_feature_manifest_path: str,
-        movie_id: str,
+        data_id: str,
         out_dir: str,
     ):
     '''
         This function creates a visualization of the colony mask in 3D for 0, 16, 32, and 48 hours.
         
         Parameters
         ----------
-        dataset_manifest_path: str
-            Path to the csv manifest containing summary data of the entire dataset
-        colony_feature_manifest_path: str
-            Path to the csv manifest containing results from brightfield colony mask feature extraction.
-        movie_id: str
-            Movie Unique ID of the movie.
+        data_id: str
+            Data ID of the movie.
         out_dir: str
             Path to the output directory where the visualization will be saved.
     '''
+
+    if out_dir is None:
+        out_dir = io.setup_base_directory_name("figures/3D Renders")
+    else:
+        out_dir = Path(out_dir)
+        out_dir.mkdir(exist_ok=True, parents=True)
     
     # get bottom z layer
-    df_feature = pd.read_csv(colony_feature_manifest_path)
-    zbottom = df_feature.loc[df['Movie Unique ID'] == movie_id, 'z_bottom'].values[0]
+    df_feature = io.load_image_analysis_extracted_features()
+    zbottom = int(df_feature.loc[df_feature['Data ID'] == data_id, 'Bottom Z plane'].values[0])
     
     # get segmentation and base filename
-    df_manifest = pd.read_csv(dataset_manifest_path)
-    seg_fn = df_manifest.loc[df_manifest['Movie Unique ID'] == movie_id, 'All Cells Mask File Download'].values[0]
-    seg = BioIo(seg_fn)
+    df_manifest = io.load_imaging_and_segmentation_dataset()
+    seg_fn = df_manifest.loc[df_manifest['Data ID'] == data_id, 'All Cells Mask File Download'].values[0]
+    seg_file = BioImage(seg_fn)
     outname = Path(seg_fn).stem + '_figure'
     
     # lighting setup
@@ -65,6 +65,7 @@ def main(
     )
 
     # process frames for 0, 16, 32, and 48 hours
+    pv.start_xvfb()
     pl = pv.Plotter(off_screen=True, notebook=False, window_size=(1088, 1088))
     for tp in tqdm([0, 32, 64, 96]):
         # clear scene
@@ -231,30 +232,21 @@ def cgal_vertices_faces_triangle_mesh(Q: Polyhedron_3):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Generate figures for colony mask segmentation.')
 
+    
     parser.add_argument(
-        '--manifest_path',
-        type=str,
-        required=True,
-        help='Path to the csv manifest containing summary data of the entire dataset.'
-    )
-    parser.add_argument(
-        '--feature_path',
-        type=str,
-        required=True,
-        help='Path to the csv manifest containing results from brightfield colony mask feature extraction.'
-    )
-    parser.add_argument(
-        '--movie_id',
+        '--data_id',
         type=str,
-        required=True,
         help='Movie Unique ID of the movie.'
     )
     parser.add_argument(
         '--output_directory',
         type=str,
-        required=True,
         help='Path to the output directory where the visualization will be saved.'
     )
 
     args = parser.parse_args()
-    main(args.manifest_path, args.feature_path, args.movie_id, args.output_directory)
+    if args.data_id is None:
+        for data_id in const.EXAMPLE_ACM_IDS:
+            main(data_id, args.output_directory)
+    else:
+        main(args.data_id, args.output_directory)
diff --git a/EMT_data_analysis/figure_generation/inside-outside_classification.py b/EMT_data_analysis/figure_generation/inside-outside_classification.py
@@ -11,38 +11,43 @@
 import pandas as pd
 import argparse
 import quilt3 as q3
+from typing import Optional
 
-from EMT_data_analysis.tools import alignment, io
+from EMT_data_analysis.tools import alignment, io, const
 
 
 def main(
-        data_id: str,
-        output: str
+        data_id: Optional[str]=None,
+        output: Optional[str]=None
     ):
     '''
         Generate three figures for the inside-outside classification of nuclei
         at 0, 16, and 32 hours.
         
         Parameters
         ----------
-        mesh_fn: str
-            Path to the .vtm file for the whole colony timelapse.
-        mid: str
+        data_id: str
             Data ID of the movie.
-        data_csv: str
-            Path to the CSV file containing the inside-outside classification data.
         output: str
             Path to the output directory where the figures will be saved.
     '''
     # ensure output directory exists
-    output = Path(output)
-    output.mkdir(exist_ok=True, parents=True)
+
+    if data_id is None:
+        data_id = const.EXAMPLE_IO_ID
+
+    if output is None:
+        output = io.setup_base_directory_name("figures/Inside-Outside/mesh-figures")
+    else:
+        output = Path(output)
+        output.mkdir(exist_ok=True, parents=True)
     
     # load data
     df_meta = io.load_imaging_and_segmentation_dataset()
     df_meta = df_meta[df_meta['Data ID'] == data_id]
     df = io.load_inside_outside_classification()
     df = df[df['Data ID'] == data_id]
+    df = df[df['Z']<27]
 
     tmp_dir = Path("./emt_tmp/nuclei_localization/")
     tmp_dir.mkdir(exist_ok=True, parents=True)
@@ -146,14 +151,12 @@ def create_nucleus_mesh(df_nucleus: pd.DataFrame):
     parser = argparse.ArgumentParser(description='Generate figures for inside-outside classification of nuclei.')
     parser.add_argument(
         '--data_id', 
-        type=str, 
-        default='3500005828_45',
-        help='FMS ID of the movie.'
+        type=str,
+        help='Data ID of the movie.'
     )
     parser.add_argument(
         '--output', 
         type=str,
-        required=True,
         help='Path to the output directory where the figures will be saved.'
     )
     
diff --git a/EMT_data_analysis/tools/const.py b/EMT_data_analysis/tools/const.py
@@ -42,4 +42,10 @@
     '3500005834_55']
 
 # Nuclues Fraction Inside/Outside Example
-EXAMPLE_IO_ID = '3500005828_45'
+EXAMPLE_IO_ID = '3500005828_45'
+
+# All Cells Mask Examples
+EXAMPLE_ACM_IDS = [
+    '3500005824_36',
+    '3500006256_12'
+]
diff --git a/README.md b/README.md
@@ -24,27 +24,55 @@ pip install -e .
 
 ## 1 - Feature extraction
 
-Run: `python Feature_extraction.py`
+Run: `python EMT_data_analysis/analysis_scripts/Feature_extraction.py`
 
 This will generate one CSV for each movie with the extracted features. CSVs are stored in the folder `EMT_data_analysis/results/feature_extraction`
 
 ## 2 - Metric computation
 
-Run: `python Metric_computation.py`
+Run: `python EMT_data_analysis/analysis_scripts/Metric_computation.py`
 
 This will generate a single CSV containing information about all the movies to be used for analysis. The manifest is saved as `EMT_data_analysis/results/metric_computation/Image_analysis_extracted_features.csv`.
 
 ## 3 - Nuclei localization
 
-Run: `python Nuclei_localization.py`
+Run: `python EMT_data_analysis/analysis_scripts/Nuclei_localization.py`
 
 This will generate CSV for individual nuclei classified as inside the basement memebrane or not over the course of the timelapse for EOMES and H2B movies. The manifest is saved as `EMT_data_analysis/results/nuclei_localization/Migration_timing_trough_mesh_extracted_feature.csv`.
 
 ## 4 - Analysis Plots
 
-Run: `python Analysis_tools.py`
+Run: `python EMT_data_analysis/analysis_scripts/Analysis_tools.py`
+
+This will generate the plots in the manuscript and store them in `results/figures` folder. The manifests used as inputs in this workflow are automatically downloaded from [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/) by default. 
+
+## 5 - [Optional] 3D Example Rendering
+
+The functions in `EMT_data_analysis/figure_generation` can be used to generate 3D renderings shown in the paper. Functions have only been tested on Ubuntu 18.04/22.04
+
+On Ubuntu or Debian:
+```bash
+sudo apt-get install xvfb libgl1-mesa-glx
+```
+On Windows: 
+Comment out any instance of `pv.start_xvfb()` in the code before running.
+
+### All Cells Mask
+Run
+```bash
+python EMT_data_analysis/figure_generation/colony_mask.py --data_id [Optional] --output_directory [Optional]
+```
+If no input arguments are provided, the code will default to the data shown in the paper and output results to `EMT_data_analysis/results/3D_all_cells_mask`.
+Data ID values are only valid inputs if they have a none-empty value for `All Cells Mask File Download` in the `image_and_segmentation_data.csv` manifest on [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/)
+
+### Inside-Outside Classification
+Run
+```bash
+python EMT_data_analysis/figure_generation/inside-outside_classification.py --data_id [Optional] --output_directory [Optional]
+```
+If no input arguments are provided, the code will default to the data shown in the paper and output results to `EMT_data_analysis/results/Inside-Outside/mesh-figures`.
+Data ID values are only valid inputs if they have a none-empty value for `CollagenIV Segmentation Mesh Folder` in the `image_and_segmentation_data.csv` manifest on [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/)
 
-This will generate the plots in the manuscript and store them in `results/figures` folder. The manifests used as inputs in this workflow are automatically downloaded from [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/) by default. The user can opt to also use local version of these manifests if they produced locally by running the scripts `Feature_extraction.py`, `Metric_computation.py` and `Nuclei_localization.py`. To use local version of the manifests, please set `load_from_aws=False` everywhere in the script `Analysis_plots.py`.
 
 # Contact
 If you have questions about this code, please reach out to us at cells@alleninstitute.org.
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml