Skip to content

Commit 52cb0cd

Browse files
authored
Merge pull request #30 from AllenCell/reSub/August_updatefig
Re sub/august updatefig
2 parents 3d87e66 + cbe8372 commit 52cb0cd

File tree

10 files changed

+499
-443
lines changed

10 files changed

+499
-443
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,4 +171,7 @@ cython_debug/
171171
pdm.toml
172172

173173
#csv
174-
*.csv
174+
*.csv
175+
176+
#mesh temporary director
177+
emt_tmp/

EMT_data_analysis/analysis_scripts/Analysis_tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def load_io_data(df):
8888
]]
8989

9090
df_io = io.load_inside_outside_classification()
91+
df_io = df_io[df_io['Z']<27]
9192

9293
dfio_merged=pd.merge(df_io, df_info, on='Data ID', suffixes=['','_remove'])
9394
remove = [col for col in dfio_merged.columns if 'remove' in col]

EMT_data_analysis/analysis_scripts/Feature_extraction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def compute_bf_colony_features_all_movies(output_folder, align=True):
3838
print(f"Movie: {movie_id}")
3939

4040
print("Getting raw data...")
41-
raw_path = df_movie["File URL"].values[0]
41+
raw_path = df_movie["Raw File URL"].values[0]
4242
raw_reader = BioImage(raw_path)
4343
print(raw_path)
4444
print(raw_reader.shape)

EMT_data_analysis/analysis_scripts/Nuclei_localization.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import pyvista as pv
1212
import trimesh
1313
import point_cloud_utils as pcu
14-
import pymeshfix as mf
1514

1615
from bioio import BioImage
1716

@@ -25,7 +24,7 @@
2524

2625
def nuclei_localization(
2726
df:pd.DataFrame,
28-
movie_id:str,
27+
data_id:str,
2928
output_directory:str,
3029
align_segmentation:bool=True,
3130
):
@@ -36,8 +35,8 @@ def nuclei_localization(
3635
----------
3736
manifest_path: str
3837
Path to the csv manifest of the full dataset
39-
movie_id: str
40-
Movie ID from manifest for data to process
38+
data_id: str
39+
Data ID from manifest for data to process
4140
output_directory: str
4241
Path to the output directory where the localized nuclei data will be saved.
4342
align_segmentation: bool
@@ -57,7 +56,7 @@ def nuclei_localization(
5756
elif df['Gene'].values[0] == 'EOMES|TBR2':
5857
seg_path = df['EOMES Nuclear Segmentation URL'].values[0]
5958
else:
60-
raise ValueError(f"The move {movie_id} does not have EOMES or H2B segmentations")
59+
raise ValueError(f"The move {data_id} does not have EOMES or H2B segmentations")
6160

6261
# import pdb; pdb.set_trace()
6362
segmentations = BioImage(df['CollagenIV Segmentation Probability URL'].values[0])
@@ -77,7 +76,7 @@ def nuclei_localization(
7776
# localize nuclei for each timepoint
7877
num_timepoints = int(df['Image Size T'].values[0])
7978
nuclei = []
80-
for timepoint in tqdm(range(num_timepoints), desc=f"Movie {movie_id}"):
79+
for timepoint in tqdm(range(num_timepoints), desc=f"Movie {data_id}"):
8180
# check if mesh exists for this timepoint
8281
if f'{timepoint}' not in meshes.keys():
8382
print(f"Mesh for timepoint {timepoint} not found.")
@@ -87,7 +86,7 @@ def nuclei_localization(
8786
break
8887

8988
if align_segmentation:
90-
alignment_matrix = alignment.parse_rotation_matrix_from_string(df['Camera Alignment Matrix'].values[0])
89+
alignment_matrix = alignment.parse_rotation_matrix_from_string(df['Dual Camera Alignment Matrix Value'].values[0])
9190
else:
9291
alignment_matrix = np.zeros((3,3))
9392

@@ -99,7 +98,7 @@ def nuclei_localization(
9998
alignment_matrix=alignment_matrix
10099
)
101100

102-
nuclei_tp['Movie ID'] = movie_id
101+
nuclei_tp['Data ID'] = data_id
103102
nuclei_tp['Time hr'] = timepoint / 0.5
104103
nuclei.append(nuclei_tp)
105104

@@ -110,7 +109,7 @@ def nuclei_localization(
110109
newcols.extend(cols[:-2])
111110
nuclei = nuclei[newcols]
112111

113-
out_fn = out_dir / (movie_id + "_localized_nuclei.csv")
112+
out_fn = out_dir / (data_id + "_localized_nuclei.csv")
114113
nuclei.to_csv(out_fn, index=False)
115114
rmtree(tmp_dir)
116115

@@ -230,8 +229,8 @@ def run_nuclei_localization(
230229
----------
231230
manifest_path: str
232231
Path to the csv manifest of the full dataset
233-
movie_id: str
234-
Movie ID from manifest for data to process
232+
data_id: str
233+
Data ID from manifest for data to process
235234
output_directory: str
236235
Path to the output directory where the localized nuclei data will be saved.
237236
align_segmentation: bool
@@ -244,13 +243,13 @@ def run_nuclei_localization(
244243

245244
print(f"Processing {len(df_cond)} movies with CollagenIV segmentations.")
246245

247-
for movie_id in tqdm(pd.unique(df_cond['Movie ID']), desc="Movies"):
248-
df_id = df_manifest[df_manifest['Movie ID'] == movie_id]
246+
for data_id in tqdm(pd.unique(df_cond['Data ID']), desc="Movies"):
247+
df_id = df_manifest[df_manifest['Data ID'] == data_id]
249248

250249
# make sure the movie has the required segmentations
251250
nuclei_localization(
252251
df=df_id,
253-
movie_id=movie_id,
252+
data_id=data_id,
254253
output_directory=output_directory,
255254
align_segmentation=align_segmentation
256255
)

EMT_data_analysis/figure_generation/colony_mask.py

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,37 +14,37 @@
1414
from skimage.morphology import remove_small_objects
1515
import argparse
1616
from typing import List
17-
17+
from EMT_data_analysis.tools import io, const
1818

1919
def main(
20-
dataset_manifest_path: str,
21-
colony_feature_manifest_path: str,
22-
movie_id: str,
20+
data_id: str,
2321
out_dir: str,
2422
):
2523
'''
2624
This function creates a visualization of the colony mask in 3D for 0, 16, 32, and 48 hours.
2725
2826
Parameters
2927
----------
30-
dataset_manifest_path: str
31-
Path to the csv manifest containing summary data of the entire dataset
32-
colony_feature_manifest_path: str
33-
Path to the csv manifest containing results from brightfield colony mask feature extraction.
34-
movie_id: str
35-
Movie Unique ID of the movie.
28+
data_id: str
29+
Data ID of the movie.
3630
out_dir: str
3731
Path to the output directory where the visualization will be saved.
3832
'''
33+
34+
if out_dir is None:
35+
out_dir = io.setup_base_directory_name("figures/3D Renders")
36+
else:
37+
out_dir = Path(out_dir)
38+
out_dir.mkdir(exist_ok=True, parents=True)
3939

4040
# get bottom z layer
41-
df_feature = pd.read_csv(colony_feature_manifest_path)
42-
zbottom = df_feature.loc[df['Movie Unique ID'] == movie_id, 'z_bottom'].values[0]
41+
df_feature = io.load_image_analysis_extracted_features()
42+
zbottom = int(df_feature.loc[df_feature['Data ID'] == data_id, 'Bottom Z plane'].values[0])
4343

4444
# get segmentation and base filename
45-
df_manifest = pd.read_csv(dataset_manifest_path)
46-
seg_fn = df_manifest.loc[df_manifest['Movie Unique ID'] == movie_id, 'All Cells Mask File Download'].values[0]
47-
seg = BioIo(seg_fn)
45+
df_manifest = io.load_imaging_and_segmentation_dataset()
46+
seg_fn = df_manifest.loc[df_manifest['Data ID'] == data_id, 'All Cells Mask File Download'].values[0]
47+
seg_file = BioImage(seg_fn)
4848
outname = Path(seg_fn).stem + '_figure'
4949

5050
# lighting setup
@@ -65,6 +65,7 @@ def main(
6565
)
6666

6767
# process frames for 0, 16, 32, and 48 hours
68+
pv.start_xvfb()
6869
pl = pv.Plotter(off_screen=True, notebook=False, window_size=(1088, 1088))
6970
for tp in tqdm([0, 32, 64, 96]):
7071
# clear scene
@@ -231,30 +232,21 @@ def cgal_vertices_faces_triangle_mesh(Q: Polyhedron_3):
231232
if __name__ == '__main__':
232233
parser = argparse.ArgumentParser(description='Generate figures for colony mask segmentation.')
233234

235+
234236
parser.add_argument(
235-
'--manifest_path',
236-
type=str,
237-
required=True,
238-
help='Path to the csv manifest containing summary data of the entire dataset.'
239-
)
240-
parser.add_argument(
241-
'--feature_path',
242-
type=str,
243-
required=True,
244-
help='Path to the csv manifest containing results from brightfield colony mask feature extraction.'
245-
)
246-
parser.add_argument(
247-
'--movie_id',
237+
'--data_id',
248238
type=str,
249-
required=True,
250239
help='Movie Unique ID of the movie.'
251240
)
252241
parser.add_argument(
253242
'--output_directory',
254243
type=str,
255-
required=True,
256244
help='Path to the output directory where the visualization will be saved.'
257245
)
258246

259247
args = parser.parse_args()
260-
main(args.manifest_path, args.feature_path, args.movie_id, args.output_directory)
248+
if args.data_id is None:
249+
for data_id in const.EXAMPLE_ACM_IDS:
250+
main(data_id, args.output_directory)
251+
else:
252+
main(args.data_id, args.output_directory)

EMT_data_analysis/figure_generation/inside-outside_classification.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,38 +11,43 @@
1111
import pandas as pd
1212
import argparse
1313
import quilt3 as q3
14+
from typing import Optional
1415

15-
from EMT_data_analysis.tools import alignment, io
16+
from EMT_data_analysis.tools import alignment, io, const
1617

1718

1819
def main(
19-
data_id: str,
20-
output: str
20+
data_id: Optional[str]=None,
21+
output: Optional[str]=None
2122
):
2223
'''
2324
Generate three figures for the inside-outside classification of nuclei
2425
at 0, 16, and 32 hours.
2526
2627
Parameters
2728
----------
28-
mesh_fn: str
29-
Path to the .vtm file for the whole colony timelapse.
30-
mid: str
29+
data_id: str
3130
Data ID of the movie.
32-
data_csv: str
33-
Path to the CSV file containing the inside-outside classification data.
3431
output: str
3532
Path to the output directory where the figures will be saved.
3633
'''
3734
# ensure output directory exists
38-
output = Path(output)
39-
output.mkdir(exist_ok=True, parents=True)
35+
36+
if data_id is None:
37+
data_id = const.EXAMPLE_IO_ID
38+
39+
if output is None:
40+
output = io.setup_base_directory_name("figures/Inside-Outside/mesh-figures")
41+
else:
42+
output = Path(output)
43+
output.mkdir(exist_ok=True, parents=True)
4044

4145
# load data
4246
df_meta = io.load_imaging_and_segmentation_dataset()
4347
df_meta = df_meta[df_meta['Data ID'] == data_id]
4448
df = io.load_inside_outside_classification()
4549
df = df[df['Data ID'] == data_id]
50+
df = df[df['Z']<27]
4651

4752
tmp_dir = Path("./emt_tmp/nuclei_localization/")
4853
tmp_dir.mkdir(exist_ok=True, parents=True)
@@ -146,14 +151,12 @@ def create_nucleus_mesh(df_nucleus: pd.DataFrame):
146151
parser = argparse.ArgumentParser(description='Generate figures for inside-outside classification of nuclei.')
147152
parser.add_argument(
148153
'--data_id',
149-
type=str,
150-
default='3500005828_45',
151-
help='FMS ID of the movie.'
154+
type=str,
155+
help='Data ID of the movie.'
152156
)
153157
parser.add_argument(
154158
'--output',
155159
type=str,
156-
required=True,
157160
help='Path to the output directory where the figures will be saved.'
158161
)
159162

EMT_data_analysis/tools/const.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,10 @@
4242
'3500005834_55']
4343

4444
# Nuclues Fraction Inside/Outside Example
45-
EXAMPLE_IO_ID = '3500005828_45'
45+
EXAMPLE_IO_ID = '3500005828_45'
46+
47+
# All Cells Mask Examples
48+
EXAMPLE_ACM_IDS = [
49+
'3500005824_36',
50+
'3500006256_12'
51+
]

README.md

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,27 +24,55 @@ pip install -e .
2424

2525
## 1 - Feature extraction
2626

27-
Run: `python Feature_extraction.py`
27+
Run: `python EMT_data_analysis/analysis_scripts/Feature_extraction.py`
2828

2929
This will generate one CSV for each movie with the extracted features. CSVs are stored in the folder `EMT_data_analysis/results/feature_extraction`
3030

3131
## 2 - Metric computation
3232

33-
Run: `python Metric_computation.py`
33+
Run: `python EMT_data_analysis/analysis_scripts/Metric_computation.py`
3434

3535
This will generate a single CSV containing information about all the movies to be used for analysis. The manifest is saved as `EMT_data_analysis/results/metric_computation/Image_analysis_extracted_features.csv`.
3636

3737
## 3 - Nuclei localization
3838

39-
Run: `python Nuclei_localization.py`
39+
Run: `python EMT_data_analysis/analysis_scripts/Nuclei_localization.py`
4040

4141
This will generate CSV for individual nuclei classified as inside the basement memebrane or not over the course of the timelapse for EOMES and H2B movies. The manifest is saved as `EMT_data_analysis/results/nuclei_localization/Migration_timing_trough_mesh_extracted_feature.csv`.
4242

4343
## 4 - Analysis Plots
4444

45-
Run: `python Analysis_tools.py`
45+
Run: `python EMT_data_analysis/analysis_scripts/Analysis_tools.py`
46+
47+
This will generate the plots in the manuscript and store them in `results/figures` folder. The manifests used as inputs in this workflow are automatically downloaded from [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/) by default.
48+
49+
## 5 - [Optional] 3D Example Rendering
50+
51+
The functions in `EMT_data_analysis/figure_generation` can be used to generate 3D renderings shown in the paper. Functions have only been tested on Ubuntu 18.04/22.04
52+
53+
On Ubuntu or Debian:
54+
```bash
55+
sudo apt-get install xvfb libgl1-mesa-glx
56+
```
57+
On Windows:
58+
Comment out any instance of `pv.start_xvfb()` in the code before running.
59+
60+
### All Cells Mask
61+
Run
62+
```bash
63+
python EMT_data_analysis/figure_generation/colony_mask.py --data_id [Optional] --output_directory [Optional]
64+
```
65+
If no input arguments are provided, the code will default to the data shown in the paper and output results to `EMT_data_analysis/results/3D_all_cells_mask`.
66+
Data ID values are only valid inputs if they have a none-empty value for `All Cells Mask File Download` in the `image_and_segmentation_data.csv` manifest on [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/)
67+
68+
### Inside-Outside Classification
69+
Run
70+
```bash
71+
python EMT_data_analysis/figure_generation/inside-outside_classification.py --data_id [Optional] --output_directory [Optional]
72+
```
73+
If no input arguments are provided, the code will default to the data shown in the paper and output results to `EMT_data_analysis/results/Inside-Outside/mesh-figures`.
74+
Data ID values are only valid inputs if they have a none-empty value for `CollagenIV Segmentation Mesh Folder` in the `image_and_segmentation_data.csv` manifest on [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/)
4675

47-
This will generate the plots in the manuscript and store them in `results/figures` folder. The manifests used as inputs in this workflow are automatically downloaded from [AWS](https://open.quiltdata.com/b/allencell/tree/aics/emt_timelapse_dataset/manifests/) by default. The user can opt to also use local version of these manifests if they produced locally by running the scripts `Feature_extraction.py`, `Metric_computation.py` and `Nuclei_localization.py`. To use local version of the manifests, please set `load_from_aws=False` everywhere in the script `Analysis_plots.py`.
4876

4977
# Contact
5078
If you have questions about this code, please reach out to us at [email protected].

0 commit comments

Comments
 (0)