From 6dd62d34baa050f7b6b89d007f937c79688564e9 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Wed, 13 Nov 2024 11:01:31 -0800 Subject: [PATCH 01/34] remove unused whole colony LRM feats --- .../lib/preprocessing/add_neighborhood_avg_features_lrm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py index 36525e5f..8cdee8ca 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py +++ b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py @@ -7,8 +7,8 @@ from nuc_morph_analysis.lib.preprocessing import filter_data from nuc_morph_analysis.lib.preprocessing.filter_data import all_timepoints_minimal_filtering -LOCAL_RADIUS_LIST = [90, -1] -LOCAL_RADIUS_STR_LIST = ["90um", "whole_colony"] +LOCAL_RADIUS_LIST = [90] +LOCAL_RADIUS_STR_LIST = ["90um"] NEIGHBOR_FEATURE_LIST = ["volume"] NEIGHBOR_PREFIX = "neighbor_avg_lrm_" From dc643ab8badd2b4818bea2a3e33fb7e1aa728294 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Wed, 13 Nov 2024 11:20:56 -0800 Subject: [PATCH 02/34] optionally add mother or sister features for full tracks --- .../lib/preprocessing/add_features.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py index 440f1ab8..d1d09a8b 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_features.py @@ -474,7 +474,7 @@ def get_sister(df, pid, current_tid): sister_id = [tid for tid in tids if tid != current_tid] return sister_id -def add_lineage_features(df, feature_list): +def add_lineage_features(df, feature_list, relationship_list=['mother', 'sister']): """ If the full track has a full track sister or mother, add the given relative's feature as a single track feature column in the dataframe. @@ -484,6 +484,8 @@ def add_lineage_features(df, feature_list): The dataframe feature_list: list List of column names + relationship_list: list + List of relationships to add Returns ------- @@ -492,17 +494,19 @@ def add_lineage_features(df, feature_list): """ for feature in feature_list: - df[f"mothers_{feature}"] = np.nan - df[f"sisters_{feature}"] = np.nan + if 'mother' in relationship_list: + df[f"mothers_{feature}"] = np.nan + if 'sister' in relationship_list: + df[f"sisters_{feature}"] = np.nan df_lineage = df[df['colony'].isin(['small', 'medium'])] for tid, dft in df_lineage.groupby("track_id"): parent_id = dft.parent_id.values[0] - if parent_id != -1 and parent_id in df_lineage.track_id.unique(): + if 'mother' in relationship_list and parent_id != -1 and parent_id in df_lineage.track_id.unique(): for feature in feature_list: df.loc[df.track_id == tid, f"mothers_{feature}"] = df_lineage.loc[df_lineage.track_id == parent_id, feature].values[0] - if parent_id != -1: + if 'sister' in relationship_list and parent_id != -1: sister_id = get_sister(df_lineage, parent_id, tid) if len(sister_id) > 0: for feature in feature_list: From 72c23bf72f535e0a2e186ae9c9794b4a7d1fa2a5 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Wed, 13 Nov 2024 11:21:12 -0800 Subject: [PATCH 03/34] only add sister features used in LRM --- .../lib/preprocessing/global_dataset_filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index 30eb05cf..9198ac6f 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -263,7 +263,7 @@ def process_full_tracks(df_all, thresh, pix_size, interval): df_full = add_growth_features.fit_tracks_to_time_powerlaw(df_full, "volume", interval) # For LRM - df_full = add_features.add_lineage_features(df_full, feature_list=['volume_at_B', 'duration_BC', 'volume_at_C', 'delta_volume_BC']) + df_full = add_features.add_lineage_features(df_full, ['volume_at_B', 'duration_BC', 'delta_volume_BC'], ['sister']) df_full = add_features.add_feature_at(df_full, "frame_transition", 'height', 'height_percentile', pix_size) df_full = add_features.add_features_at_transition(df_full) df_full = add_features.get_early_transient_gr_of_neighborhood(df_full, scale=get_plot_labels_for_metric('neighbor_avg_dxdt_48_volume_90um')[0]) From e846f20853a241e4bbd7f4e0255c38ad7adb9357 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Wed, 13 Nov 2024 15:54:46 -0800 Subject: [PATCH 04/34] dont calc neighborhood avg for dxdt feats automatically --- .../lib/preprocessing/add_neighborhood_avg_features_lrm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py index 8cdee8ca..b8fae481 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py +++ b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py @@ -143,7 +143,7 @@ def run_script( dfi = df[df["colony"] == colony] pass_cols = ["index_sequence", "colony", "track_id", "centroid_x", "centroid_y"] - columns = feature_list + [x for x in dfi.columns if "dxdt" in x] + columns = feature_list # first find the unique index_sequence values index_sequences = dfi["index_sequence"].unique() From 89fe2b74c77fd3783aa81891b1208f8b8893f997 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 15 Nov 2024 11:11:59 -0800 Subject: [PATCH 05/34] remove unused lrm ft --- nuc_morph_analysis/lib/preprocessing/add_features.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py index d1d09a8b..8722e0c3 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_features.py @@ -638,7 +638,6 @@ def add_features_at_transition(df, 'neighbor_avg_lrm_height_90um', 'neighbor_avg_lrm_xy_aspect_90um', 'neighbor_avg_lrm_mesh_sa_90um', - 'neighbor_avg_dxdt_48_volume_90um', 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um'] ): """ From 0fd10a8cc6191efd1ad4750b5fc9aa40ec6df4a3 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Tue, 19 Nov 2024 10:32:03 -0800 Subject: [PATCH 06/34] temp sorting through old vs new columns --- .../preprocessing/save_datasets_for_quilt.py | 170 +++++++++++++++++- 1 file changed, 168 insertions(+), 2 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 04e6685a..19eb6ad3 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -52,7 +52,173 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): # %% Load baseline colonies df_all_baseline = global_dataset_filtering.load_dataset_with_features() -print(*[col for col in df_all_baseline.columns if "NUC_sh" not in col], sep="\n") +# print(*[col for col in df_all_baseline.columns if "NUC_sh" not in col], sep="\n") +#%% old and new col lists +previous_cols = [ + "CellId", + "label_img", + "track_id", + "colony", + "index_sequence", + "roi", + "centroid_x", + "centroid_y", + "centroid_z", + "volume", + "height", + "mesh_vol", + "mesh_sa", + "SA_vol_ratio", + "transform_params", + "NUC_", + "length", + "width", + "xz_aspect", + "xy_aspect", + "zy_aspect", + "fov_edge", + "predicted_formation", + "predicted_breakdown", + "Ff", + "Fb", + "after_breakdown_outlier", + "before_formation_outlier", + "is_after_breakdown_before_formation_outlier", + "termination", + "entering_mitosis", + "exiting_mitosis", + "entering_or_exiting_division", + "neighbors", + "neigh_distance", + "density", + "is_tp_outlier", + "track_length", + "is_outlier_by_short_track", + "is_outlier_curated_by_id", + "is_growth_outlier", + "is_outlier_track", + "is_outlier", + "parent_id", + "family_id", + "distance_from_centroid", + "colony_depth", + "normalized_colony_depth", + "normalized_distance_from_centroid", + "colony_edge_in_fov", + "colony_time", + "non_interphase_volume", + "non_interphase_mesh_sa", + "non_interphase_SA_vol_ratio", + "non_interphase_size_shape", + "dxdt_48_volume", + "neighbor_avg_volume_90um", + "neighbor_avg_dxdt_48_volume_90um", + "neighbor_avg_volume_whole_colony", + "neighbor_avg_dxdt_48_volume_whole_colony", + "normalized_time", + "frame_transition", + "sync_time_Ff", + "volume_at_A", + "location_x_at_A", + "location_y_at_A", + "time_at_A", + "colony_time_at_A", + "volume_at_B", + "location_x_at_B", + "location_y_at_B", + "time_at_B", + "colony_time_at_B", + "volume_at_C", + "location_x_at_C", + "location_y_at_C", + "time_at_C", + "colony_time_at_C", + "duration_AB", + "duration_BC", + "duration_AC", + "delta_volume_BC", + "volume_fold_change_BC", + "SA_at_B", + "SA_at_C", + "delta_SA_BC", + "SA_fold_change_BC", + "volume_fold_change_fromB", + "SA_fold_change_fromB", + "growth_rate_AB", + "late_growth_rate_by_endpoints", + "tscale_linearityfit_volume", + "atB_linearityfit_volume", + "rate_linearityfit_volume", + "RMSE_linearityfit_volume", + "is_full_track", + "exploratory_dataset", + "baseline_colonies_dataset", + "full_interphase_dataset", + "lineage_annotated_dataset" +] +columns_list = [col for col in df_all_baseline.columns if "NUC_sh" not in col] + +#%% Get differences +previous_not_in_current = [col for col in previous_cols if col not in columns_list] +current_not_in_previous = [col for col in columns_list if col not in previous_cols] +print("Columns in previous but not in current:") +print(previous_not_in_current) +print("\nColumns in current but not in previous:") +print(current_not_in_previous) + +#%% 139 NEW COLUMNS!! +new_cols = current_not_in_previous +keep_list = [ + #needed to calc linear reg model feats + "has_mitotic_neighbor", + "has_dying_neighbor", + "sum_has_dying_neighbor", + "sum_has_mitotic_neighbor", + "neighbor_avg_lrm_volume_90um", + "neighbor_avg_lrm_height_90um", + "neighbor_avg_lrm_xy_aspect_90um", + "neighbor_avg_lrm_mesh_sa_90um", + "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", + "neighbor_avg_lrm_dxdt_48_volume_90um", + + #used in lrm + "sisters_volume_at_B", + "sisters_duration_BC", + "sisters_delta_volume_BC", + "height_at_B", + "xy_aspect_at_B", + "SA_vol_ratio_at_B", + "neighbor_avg_lrm_volume_90um_at_B", + "neighbor_avg_lrm_height_90um_at_B", + "neighbor_avg_lrm_xy_aspect_90um_at_B", + "neighbor_avg_lrm_mesh_sa_90um_at_B", + "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um_at_B", + "early_neighbor_avg_dxdt_48_volume_90um", + "mean_neighbor_avg_dxdt_48_volume_90um", + "mean_neighbor_avg_lrm_volume_90um", + "mean_neighbor_avg_lrm_height_90um", + "mean_neighbor_avg_lrm_xy_aspect_90um", + "mean_neighbor_avg_lrm_mesh_sa_90um", + "mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", + "normalized_sum_has_mitotic_neighbor", + "normalized_sum_has_dying_neighbor",] + +new_cols = [col for col in new_cols if col not in keep_list] + +print(len(keep_list)) +print(len(new_cols)) +#%% +for col in new_cols: + print(col) + +#%% Suggest we remove +drop_list = [ + 'level_0', # there is code in global datasets filtering and lebeling neighbors helper that is checking for this and there is now warning printed?? + 'index', # cell id should be the index column.. why is this here? + + # Loads of 2d columns are unused... work with chris to identify which ones we actually need. + ] + # %% Filter baseline colonies df_baseline = filter_data.all_timepoints_minimal_filtering(df_all_baseline) @@ -103,4 +269,4 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): save_dataset_for_quilt(df_full_feeding_control, "feeding_control_analysis_dataset") save_dataset_for_quilt(df_aphidicolin, "dna_replication_inhibitor_analysis_dataset") save_dataset_for_quilt(df_importazole, "nuclear_import_inhibitor_analysis_dataset") -# %% + From 0e5827be7c3bf85c3d8c73adddf3d866301294e3 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Thu, 5 Dec 2024 11:14:51 -0800 Subject: [PATCH 07/34] update --- .../lib/preprocessing/save_datasets_for_quilt.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 19eb6ad3..449fb5ba 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -203,7 +203,12 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): "normalized_sum_has_mitotic_neighbor", "normalized_sum_has_dying_neighbor",] -new_cols = [col for col in new_cols if col not in keep_list] +drop_list = [ + 'level_0', + 'index', + ] + +new_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] print(len(keep_list)) print(len(new_cols)) @@ -211,13 +216,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): for col in new_cols: print(col) -#%% Suggest we remove -drop_list = [ - 'level_0', # there is code in global datasets filtering and lebeling neighbors helper that is checking for this and there is now warning printed?? - 'index', # cell id should be the index column.. why is this here? - - # Loads of 2d columns are unused... work with chris to identify which ones we actually need. - ] # %% Filter baseline colonies From d3d57281b1e1f40b3fa0db59194f82748917023d Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 11:37:10 -0800 Subject: [PATCH 08/34] inv_cyto and others removed --- ...l_density_at_different_zarr_resolutions.py | 7 +- .../lib/preprocessing/filter_data.py | 4 +- .../preprocessing/save_datasets_for_quilt.py | 141 +++++++++++++++++- .../twoD_zMIP_area/pseudo_cell_helper.py | 16 +- .../lib/visualization/label_tables.py | 7 - 5 files changed, 149 insertions(+), 26 deletions(-) diff --git a/nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py b/nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py index b33db501..cd9b0f24 100644 --- a/nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py +++ b/nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py @@ -30,7 +30,7 @@ os.makedirs(str(fig_dir), exist_ok=True) # now plot correlation -feature_list = ['2d_area_nucleus','2d_area_pseudo_cell','2d_area_cyto','inv_cyto_density','2d_area_nuc_cell_ratio'] +feature_list = ['2d_area_nucleus','2d_area_pseudo_cell','2d_area_nuc_cell_ratio'] for feature in feature_list: fig,ax = plt.subplots(figsize=(2.5,2.5),layout = 'constrained') @@ -38,10 +38,7 @@ xunits = 1.0 yunits = 1.0 unitstr = '' - elif 'inv_cyto' in feature: # density features - xunits = 1/((0.108)**2) - yunits = 1/((0.108*2.5)**2) - unitstr = '(1/um^2)' + else: # area features xunits = (0.108)**2 yunits = (0.108*2.5)**2 diff --git a/nuc_morph_analysis/lib/preprocessing/filter_data.py b/nuc_morph_analysis/lib/preprocessing/filter_data.py index bd83c69d..0ff547d5 100644 --- a/nuc_morph_analysis/lib/preprocessing/filter_data.py +++ b/nuc_morph_analysis/lib/preprocessing/filter_data.py @@ -1012,7 +1012,7 @@ def remove_expected_pseudo_cell_artifacts(dfm, apply_to_nucleus_too=False, verbo compiled_log = log1 | log2 | log3 | log4 | log5 # define the columns to apply the filter to - extra_cols = ['inv_cyto_density','density'] + extra_cols = ['density'] # cols = [x for x in cols if '2d_' in dfm.columns] if apply_to_nucleus_too: cols = [x for x in dfm.columns if ('2d_' in x) & ('label' not in x)] + extra_cols @@ -1064,7 +1064,7 @@ def remove_uncaught_pseudo_cell_artifacts(df, apply_to_nucleus_too=False, verbos compiled_log = log1 | log2 | log3 # define the columns to apply the filter to - extra_cols = ['inv_cyto_density','density'] + extra_cols = ['density'] # cols = [x for x in cols if '2d_' in dfm.columns] if apply_to_nucleus_too: cols = [x for x in df.columns if ('2d_' in x) & ('label' not in x)] + extra_cols diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 449fb5ba..eb095125 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -201,11 +201,150 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): "mean_neighbor_avg_lrm_mesh_sa_90um", "mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", "normalized_sum_has_mitotic_neighbor", - "normalized_sum_has_dying_neighbor",] + "normalized_sum_has_dying_neighbor", + + # density features + '2d_area_nucleus', + '2d_area_pseudo_cell', + '2d_area_nuc_cell_ratio', + + + ] drop_list = [ 'level_0', 'index', + 'source_manifest_x', + 'source_manifest_y', + + '2d_label_true_nucleus', + '2d_area_true_nucleus', + '2d_total_area_true_nucleus', + '2d_label_nucleus', + # '2d_area_nucleus', #KEEPING + '2d_bbox',-0_nucleus + '2d_bbox',-1_nucleus + '2d_bbox',-2_nucleus + '2d_bbox',-3_nucleus + '2d_centroid',-0_nucleus + '2d_centroid',-1_nucleus + '2d_convex_area_nucleus', + '2d_eccentricity_nucleus', + '2d_equivalent_diameter_nucleus', + '2d_extent_nucleus', + '2d_filled_area_nucleus', + '2d_major_axis_length_nucleus', + '2d_minor_axis_length_nucleus', + '2d_orientation_nucleus', + '2d_perimeter_nucleus', #can be dropped at end of global dataset filterig, only used in load_dataset_with_features. + '2d_solidity_nucleus', + '2d_img_shape_nucleus', + 'resolution_level_dup1', + '2d_label_true_pseudo_cell', + '2d_area_true_pseudo_cell', + '2d_total_area_true_pseudo_cell', + '2d_label_pseudo_cell', + # '2d_area_pseudo_cell', # KEEPING + '2d_bbox',-0_pseudo_cell + '2d_bbox',-1_pseudo_cell + '2d_bbox',-2_pseudo_cell + '2d_bbox',-3_pseudo_cell + '2d_centroid',-0_pseudo_cell + '2d_centroid',-1_pseudo_cell + '2d_convex_area_pseudo_cell', + '2d_eccentricity_pseudo_cell', + '2d_equivalent_diameter_pseudo_cell', + '2d_extent_pseudo_cell', + '2d_filled_area_pseudo_cell', + '2d_major_axis_length_pseudo_cell', + '2d_minor_axis_length_pseudo_cell', + '2d_orientation_pseudo_cell', + '2d_perimeter_pseudo_cell', # can be dropped at end of global dataset filtering, only used in load_dataset_with_features + '2d_solidity_pseudo_cell', + '2d_img_shape_pseudo_cell', + 'resolution_level_dup2', + '2d_label_true_edge', + '2d_area_true_edge', + '2d_total_area_true_edge', + '2d_label_edge', + '2d_intensity_max_edge', + '2d_intensity_mean_edge', + '2d_intensity_min_edge', # this one is fun, its distance to nearest nucleus edge (different than centroid distance) + '2d_img_shape_edge', + 'resolution_level', + # '2d_area_nuc_cell_ratio', # KEEPING + '2d_area_cyto', + 'inv_cyto_density', + # 'frame_of_breakdown', + # 'frame_of_formation', + # 'has_mitotic_neighbor_breakdown', + # 'number_of_frame_of_breakdown_neighbors', + # 'has_mitotic_neighbor_formation', + # 'number_of_frame_of_formation_neighbors', + # 'has_mitotic_neighbor_breakdown_forward_dilated', + # 'has_mitotic_neighbor_formation_backward_dilated', + # 'has_mitotic_neighbor_dilated', + # 'identified_death', + # 'frame_of_death', + # 'number_of_frame_of_death_neighbors', + # 'has_dying_neighbor_forward_dilated', + # 'dxdt_5_volume_end', + # 'volume_change_over_25_minutes', + # 'neighbor_avg_lrm_dxdt_5_volume_end_90um', + # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_90um_90um', + # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_whole_colony_90um', + # 'neighbor_avg_lrm_volume_whole_colony', + # 'neighbor_avg_lrm_height_whole_colony', + # 'neighbor_avg_lrm_xy_aspect_whole_colony', + # 'neighbor_avg_lrm_mesh_sa_whole_colony', + # 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_whole_colony', + # 'neighbor_avg_lrm_dxdt_48_volume_whole_colony', + # 'neighbor_avg_lrm_dxdt_5_volume_end_whole_colony', + # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_90um_whole_colony', + # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_whole_colony_whole_colony', + # '2d_perimeter_nuc_cell_ratio', + # 'bad_pseudo_cells_segmentation', + # 'uncaught_pseudo_cell_artifact', + # 'power_fit_volume', + # 'tscale_exponentialfit_volume', + # 'atB_exponentialfit_volume', + # 'rate_exponentialfit_volume', + # 'RMSE_exponentialfit_volume', + # 'tscale_linearfit_volume', + # 'atB_linearfit_volume', + # 'rate_linearfit_volume', + # 'RMSE_linearfit_volume', + # 'mothers_volume_at_B', + # 'mothers_duration_BC', + # 'mothers_volume_at_C', + # 'sisters_volume_at_C', + # 'mothers_delta_volume_BC', + # 'neighbor_avg_dxdt_48_volume_90um_at_B', + # 'volume_dips_peak_mask_at_region', + # 'volume_dips_peak_mask_at_center', + # 'volume_dips_has_peak', + # 'volume_dips_volume_change_at_center', + # 'volume_dips_volume_change_at_region', + # 'volume_dips_width_at_center', + # 'volume_dips_width_at_region', + # 'volume_dips_max_volume_change', + # 'volume_dips_peak_id_at_center', + # 'volume_dips_peak_id_at_region', + # 'volume_dips_total_number', + # 'volume_dips_removed_um_unfilled', + # 'dxdt_48_volume_dips_removed_um_unfilled', + # 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um', + # 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony', + # 'sum_number_of_frame_of_breakdown_neighbors', + # 'sum_number_of_frame_of_formation_neighbors', + # 'sum_has_mitotic_neighbor_breakdown', + # 'sum_has_mitotic_neighbor_formation', + # 'sum_has_mitotic_neighbor_breakdown_forward_dilated', + # 'sum_has_mitotic_neighbor_formation_backward_dilated', + # 'sum_has_mitotic_neighbor_dilated', + # 'sum_has_dying_neighbor_forward_dilated', + # 'sum_number_of_frame_of_death_neighbors', + ] new_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] diff --git a/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py b/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py index 5eb88a81..378c23b1 100644 --- a/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py +++ b/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py @@ -34,11 +34,9 @@ # for the nucleus all features will take the form of 2d_{feature}_nucleus, such as 2d_area_nucleus # for the pseudo cell all features will take the form of 2d_{feature}_pseudo_cell, such as 2d_area_pseudo_cell -# some specific new features are computed from these +# specific new feature computed from these [ '2d_area_nuc_cell_ratio', # ratio of nucleus area to pseudo cell area -'2d_area_cyto', # cytoplasmic area (pseudo cell area - nucleus area) -'inv_cyto_density', # inverse of cytoplasmic area (1/cytoplasmic area) ] # some features come from measuring the true area of each object without using skimage.measure.regionprops_table @@ -294,11 +292,9 @@ def merge_2d_features(dfleft, dfright, suffixes=('_dup1','_dup2')): def define_density_features(df_2d): """ - define density features in the 2D dataframe - the first density feature (2d_area_nuc_cell_ratio) is defined as the area of a nucleus divided by the area of the (pseudo) cell - the second density feature (inv_cyto_density) is defined as the inverse of the cytoplasmic area - a sub feature is the cytoplasmic area (2d_area_cyto) which is defined as the difference between the pseudo cell area and the nucleus area - + define density feature in the 2D dataframe, 2d_area_nuc_cell_ratio, defined as the area + of a nucleus divided by the area of the (pseudo) cell + Parameters ---------- df_2d : pd.DataFrame @@ -307,11 +303,9 @@ def define_density_features(df_2d): Returns ------- df_2d : pd.DataFrame - the 2D dataframe with the density features added (2d_area_nuc_cell_ratio, inv_cyto_density) + the 2D dataframe with the density features added (2d_area_nuc_cell_ratio) """ df_2d['2d_area_nuc_cell_ratio'] = df_2d['2d_area_nucleus'] / df_2d['2d_area_pseudo_cell'] # unitless - df_2d['2d_area_cyto'] = df_2d['2d_area_pseudo_cell'] - df_2d['2d_area_nucleus'] # units of pixel_area - df_2d['inv_cyto_density'] = 1/df_2d['2d_area_cyto'] # units of 1/pixel_area return df_2d def choose_columns(df_2d): diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index d6b07f08..bad35734 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -78,15 +78,8 @@ def get_scale_factor_table(dataset="all_baseline"): ( "2d_area_nucleus", "2d_area_pseudo_cell", - "2d_area_cyto", ): (pix_size/2.5)**2, # resolution_level 1 is 2.5x downsampled ("2d_area_pseudo_cell"): (pix_size/2.5)**2, # resolution_level 1 is 2.5x downsampled - ("inv_cyto_density"): 1 / (pix_size/2.5)**2, # resolution level =1 is 2.5x downsampled - ( - "2d_intensity_min_edge", - "2d_intensity_mean_edge", - "2d_intensity_max_edge", - ): pix_size/2.5, # resolution level =1 is 2.5x downsampled ("2d_area_nuc_cell_ratio"): 1, } From e3705b867b6b5e20c04c575cd6354e9c0851cab8 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 11:50:33 -0800 Subject: [PATCH 09/34] determine neighbor_of_X columns to keep --- .../preprocessing/save_datasets_for_quilt.py | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index eb095125..8a121d6c 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -209,6 +209,22 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_nuc_cell_ratio', + # neighbor_of_X features + 'frame_of_breakdown', # used in figure_mitotic_filtering_examples.py + 'frame_of_formation',# used in figure_mitotic_filtering_examples.py + 'has_mitotic_neighbor_breakdown', # used in validation/illustration code, useful to keep + 'number_of_frame_of_breakdown_neighbors', # used in neighbor_of_X/example_timepoint_numbers_of_mitotic.py + 'has_mitotic_neighbor_formation', # used in validation/illustration code, useful to keep + 'number_of_frame_of_formation_neighbors', # used in neighbor_of_X/example_timepoint_numbers_of_mitotic.py + 'has_mitotic_neighbor_breakdown_forward_dilated', # used in neighbor_of_X/example + 'has_mitotic_neighbor_formation_backward_dilated', # used in neighbor_of_X/example + 'has_mitotic_neighbor_dilated', # used in figure_mitotic_filtering_examples.py + 'identified_death', # used in neighbor_of_X/example + 'frame_of_death', # used in neighbor_of_X/example + 'number_of_frame_of_death_neighbors', # used + 'has_dying_neighbor_forward_dilated', #used + + ] drop_list = [ @@ -222,12 +238,12 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_total_area_true_nucleus', '2d_label_nucleus', # '2d_area_nucleus', #KEEPING - '2d_bbox',-0_nucleus - '2d_bbox',-1_nucleus - '2d_bbox',-2_nucleus - '2d_bbox',-3_nucleus - '2d_centroid',-0_nucleus - '2d_centroid',-1_nucleus + '2d_bbox-0_nucleus', + '2d_bbox-1_nucleus', + '2d_bbox-2_nucleus', + '2d_bbox-3_nucleus', + '2d_centroid-0_nucleus', + '2d_centroid-1_nucleus', '2d_convex_area_nucleus', '2d_eccentricity_nucleus', '2d_equivalent_diameter_nucleus', @@ -245,12 +261,12 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_total_area_true_pseudo_cell', '2d_label_pseudo_cell', # '2d_area_pseudo_cell', # KEEPING - '2d_bbox',-0_pseudo_cell - '2d_bbox',-1_pseudo_cell - '2d_bbox',-2_pseudo_cell - '2d_bbox',-3_pseudo_cell - '2d_centroid',-0_pseudo_cell - '2d_centroid',-1_pseudo_cell + '2d_bbox-0_pseudo_cell', + '2d_bbox-1_pseudo_cell', + '2d_bbox-2_pseudo_cell', + '2d_bbox-3_pseudo_cell', + '2d_centroid-0_pseudo_cell', + '2d_centroid-1_pseudo_cell', '2d_convex_area_pseudo_cell', '2d_eccentricity_pseudo_cell', '2d_equivalent_diameter_pseudo_cell', @@ -275,19 +291,10 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): # '2d_area_nuc_cell_ratio', # KEEPING '2d_area_cyto', 'inv_cyto_density', - # 'frame_of_breakdown', - # 'frame_of_formation', - # 'has_mitotic_neighbor_breakdown', - # 'number_of_frame_of_breakdown_neighbors', - # 'has_mitotic_neighbor_formation', - # 'number_of_frame_of_formation_neighbors', - # 'has_mitotic_neighbor_breakdown_forward_dilated', - # 'has_mitotic_neighbor_formation_backward_dilated', - # 'has_mitotic_neighbor_dilated', - # 'identified_death', - # 'frame_of_death', - # 'number_of_frame_of_death_neighbors', - # 'has_dying_neighbor_forward_dilated', + + + + # 'dxdt_5_volume_end', # 'volume_change_over_25_minutes', # 'neighbor_avg_lrm_dxdt_5_volume_end_90um', From 85a9f0e97bba2284427e3e01e377b432edd515e6 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Thu, 5 Dec 2024 12:05:04 -0800 Subject: [PATCH 10/34] more feature pruning --- .../preprocessing/save_datasets_for_quilt.py | 83 +++++-------------- 1 file changed, 22 insertions(+), 61 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 8a121d6c..4233fa0e 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -223,7 +223,18 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'frame_of_death', # used in neighbor_of_X/example 'number_of_frame_of_death_neighbors', # used 'has_dying_neighbor_forward_dilated', #used - + + # volume_dips features + 'volume_change_over_25_minutes', #used + '2d_perimeter_nuc_cell_ratio', # used within filter data + 'bad_pseudo_cells_segmentation', + 'uncaught_pseudo_cell_artifact', + 'power_fit_volume', #used and could go to tfe + + + # new fitting paramaters features + 'RMSE_exponentialfit_volume', #used + 'RMSE_linearfit_volume', #used ] @@ -291,66 +302,16 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): # '2d_area_nuc_cell_ratio', # KEEPING '2d_area_cyto', 'inv_cyto_density', - - - - - # 'dxdt_5_volume_end', - # 'volume_change_over_25_minutes', - # 'neighbor_avg_lrm_dxdt_5_volume_end_90um', - # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_90um_90um', - # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_whole_colony_90um', - # 'neighbor_avg_lrm_volume_whole_colony', - # 'neighbor_avg_lrm_height_whole_colony', - # 'neighbor_avg_lrm_xy_aspect_whole_colony', - # 'neighbor_avg_lrm_mesh_sa_whole_colony', - # 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_whole_colony', - # 'neighbor_avg_lrm_dxdt_48_volume_whole_colony', - # 'neighbor_avg_lrm_dxdt_5_volume_end_whole_colony', - # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_90um_whole_colony', - # 'neighbor_avg_lrm_neighbor_avg_dxdt_48_volume_whole_colony_whole_colony', - # '2d_perimeter_nuc_cell_ratio', - # 'bad_pseudo_cells_segmentation', - # 'uncaught_pseudo_cell_artifact', - # 'power_fit_volume', - # 'tscale_exponentialfit_volume', - # 'atB_exponentialfit_volume', - # 'rate_exponentialfit_volume', - # 'RMSE_exponentialfit_volume', - # 'tscale_linearfit_volume', - # 'atB_linearfit_volume', - # 'rate_linearfit_volume', - # 'RMSE_linearfit_volume', - # 'mothers_volume_at_B', - # 'mothers_duration_BC', - # 'mothers_volume_at_C', - # 'sisters_volume_at_C', - # 'mothers_delta_volume_BC', - # 'neighbor_avg_dxdt_48_volume_90um_at_B', - # 'volume_dips_peak_mask_at_region', - # 'volume_dips_peak_mask_at_center', - # 'volume_dips_has_peak', - # 'volume_dips_volume_change_at_center', - # 'volume_dips_volume_change_at_region', - # 'volume_dips_width_at_center', - # 'volume_dips_width_at_region', - # 'volume_dips_max_volume_change', - # 'volume_dips_peak_id_at_center', - # 'volume_dips_peak_id_at_region', - # 'volume_dips_total_number', - # 'volume_dips_removed_um_unfilled', - # 'dxdt_48_volume_dips_removed_um_unfilled', - # 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um', - # 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony', - # 'sum_number_of_frame_of_breakdown_neighbors', - # 'sum_number_of_frame_of_formation_neighbors', - # 'sum_has_mitotic_neighbor_breakdown', - # 'sum_has_mitotic_neighbor_formation', - # 'sum_has_mitotic_neighbor_breakdown_forward_dilated', - # 'sum_has_mitotic_neighbor_formation_backward_dilated', - # 'sum_has_mitotic_neighbor_dilated', - # 'sum_has_dying_neighbor_forward_dilated', - # 'sum_number_of_frame_of_death_neighbors', + 'dxdt_5_volume_end', # can be dropped using code, duplicate column + # 'volume_change_over_25_minutes', # KEEPING + # '2d_perimeter_nuc_cell_ratio', # KEEPING + # 'bad_pseudo_cells_segmentation', # KEEPing + 'tscale_exponentialfit_volume', + 'atB_exponentialfit_volume', + 'rate_exponentialfit_volume', + 'tscale_linearfit_volume', + 'atB_linearfit_volume', + 'rate_linearfit_volume', ] From 1ab84ea281666602497325fa1fa90d6201ad5306 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 12:17:26 -0800 Subject: [PATCH 11/34] first pass going through new features to keep and drop --- .../preprocessing/save_datasets_for_quilt.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 4233fa0e..b9c14de4 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -230,6 +230,13 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'bad_pseudo_cells_segmentation', 'uncaught_pseudo_cell_artifact', 'power_fit_volume', #used and could go to tfe + 'volume_dips_peak_mask_at_region', #u sed for S10 C right and nice for TFE + 'volume_dips_peak_mask_at_center', # used for S10C right + 'volume_dips_volume_change_at_center', # used for S10D thresholding + 'volume_dips_removed_um_unfilled', # used for S10E abd S10B + 'dxdt_48_volume_dips_removed_um_unfilled', # used for S10E abd S10B + 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um', # used for S10 G + 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony', # used for S10 F # new fitting paramaters features @@ -313,6 +320,25 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'atB_linearfit_volume', 'rate_linearfit_volume', + 'volume_dips_has_peak', + 'volume_dips_volume_change_at_region', + 'volume_dips_width_at_center', + 'volume_dips_width_at_region', + 'volume_dips_max_volume_change', + 'volume_dips_peak_id_at_center', + 'volume_dips_peak_id_at_region', + 'volume_dips_total_number', + + 'sum_number_of_frame_of_breakdown_neighbors', + 'sum_number_of_frame_of_formation_neighbors', + 'sum_has_mitotic_neighbor_breakdown', + 'sum_has_mitotic_neighbor_formation', + 'sum_has_mitotic_neighbor_breakdown_forward_dilated', + 'sum_has_mitotic_neighbor_formation_backward_dilated', + 'sum_has_mitotic_neighbor_dilated', + 'sum_has_dying_neighbor_forward_dilated', + 'sum_number_of_frame_of_death_neighbors', + ] new_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] From f9bdd0be988df5e0e5fefe3c8888d7f16f79d097 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 13:07:46 -0800 Subject: [PATCH 12/34] clean up keep and drop lists --- .../lib/preprocessing/save_datasets_for_quilt.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index b9c14de4..323de379 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -207,6 +207,10 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_nucleus', '2d_area_pseudo_cell', '2d_area_nuc_cell_ratio', + '2d_perimeter_nucleus', #can be dropped at end of global dataset filterig, only used in load_dataset_with_features. + '2d_perimeter_pseudo_cell', # can be dropped at end of global dataset filtering, only used in load_dataset_with_features + + # neighbor_of_X features @@ -255,7 +259,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_true_nucleus', '2d_total_area_true_nucleus', '2d_label_nucleus', - # '2d_area_nucleus', #KEEPING '2d_bbox-0_nucleus', '2d_bbox-1_nucleus', '2d_bbox-2_nucleus', @@ -270,7 +273,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_major_axis_length_nucleus', '2d_minor_axis_length_nucleus', '2d_orientation_nucleus', - '2d_perimeter_nucleus', #can be dropped at end of global dataset filterig, only used in load_dataset_with_features. '2d_solidity_nucleus', '2d_img_shape_nucleus', 'resolution_level_dup1', @@ -278,7 +280,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_true_pseudo_cell', '2d_total_area_true_pseudo_cell', '2d_label_pseudo_cell', - # '2d_area_pseudo_cell', # KEEPING '2d_bbox-0_pseudo_cell', '2d_bbox-1_pseudo_cell', '2d_bbox-2_pseudo_cell', @@ -293,7 +294,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_major_axis_length_pseudo_cell', '2d_minor_axis_length_pseudo_cell', '2d_orientation_pseudo_cell', - '2d_perimeter_pseudo_cell', # can be dropped at end of global dataset filtering, only used in load_dataset_with_features '2d_solidity_pseudo_cell', '2d_img_shape_pseudo_cell', 'resolution_level_dup2', @@ -306,13 +306,9 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_intensity_min_edge', # this one is fun, its distance to nearest nucleus edge (different than centroid distance) '2d_img_shape_edge', 'resolution_level', - # '2d_area_nuc_cell_ratio', # KEEPING '2d_area_cyto', 'inv_cyto_density', 'dxdt_5_volume_end', # can be dropped using code, duplicate column - # 'volume_change_over_25_minutes', # KEEPING - # '2d_perimeter_nuc_cell_ratio', # KEEPING - # 'bad_pseudo_cells_segmentation', # KEEPing 'tscale_exponentialfit_volume', 'atB_exponentialfit_volume', 'rate_exponentialfit_volume', @@ -345,6 +341,10 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): print(len(keep_list)) print(len(new_cols)) +overlap = [x for x in keep_list if x in drop_list] +overlap2 = [x for x in drop_list if x in keep_list] + +print(len(overlap),len(overlap2)) #%% for col in new_cols: print(col) From 1b4b9698343e8be1cae9b8df9bb762c54d09b383 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 13:38:40 -0800 Subject: [PATCH 13/34] add code to remove columns to be dropped --- .../lib/preprocessing/add_features.py | 21 ++--- .../preprocessing/global_dataset_filtering.py | 81 +++++++++++++++++++ .../preprocessing/save_datasets_for_quilt.py | 29 ++++--- .../lib/visualization/label_tables.py | 12 --- .../visualization/write_data_for_colorizer.py | 29 +------ 5 files changed, 114 insertions(+), 58 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py index 90684631..1b3d8d95 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_features.py @@ -574,17 +574,18 @@ def sum_mitotic_events_along_full_track(df0, feature_list=[]): """ mitotic_event_features = [ - 'number_of_frame_of_breakdown_neighbors', - 'number_of_frame_of_formation_neighbors', - 'has_mitotic_neighbor_breakdown', - 'has_mitotic_neighbor_formation', - 'has_mitotic_neighbor_breakdown_forward_dilated', - 'has_mitotic_neighbor_formation_backward_dilated', 'has_mitotic_neighbor', - 'has_mitotic_neighbor_dilated', 'has_dying_neighbor', - 'has_dying_neighbor_forward_dilated', - 'number_of_frame_of_death_neighbors' + + # 'number_of_frame_of_breakdown_neighbors', + # 'number_of_frame_of_formation_neighbors', + # 'has_mitotic_neighbor_breakdown', + # 'has_mitotic_neighbor_formation', + # 'has_mitotic_neighbor_breakdown_forward_dilated', + # 'has_mitotic_neighbor_formation_backward_dilated', + # 'has_mitotic_neighbor_dilated', + # 'has_dying_neighbor_forward_dilated', + # 'number_of_frame_of_death_neighbors' ] if len(feature_list) == 0: @@ -712,6 +713,8 @@ def add_volume_change_over_25_minute_window(df, bin_interval=5): # run the compute_change_over_time workflow for a given bin_interval dfm = run_script(dfm,['volume'], [bin_interval], time_location='end') dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5 + # drop the dxdt_5_volume_end column + dfm = dfm.drop(columns=['dxdt_5_volume_end']) # now check that all columns in df have the same dtype as columns in dfm for col in df.columns: diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index cb4a866f..feebf348 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -330,6 +330,87 @@ def merge_datasets(df_all, df_full): "height_percentile", "raw_full_zstack_path", "seg_full_zstack_path", + + # not sure where these come from + 'level_0', + 'index', + 'source_manifest_x', + 'source_manifest_y', + + + # this set is defined in psuedo_cell_helper, not used after. + # could be removed in pseudo_cell_helper.choose_columns() but that would require rerunning generate_main_manifest + '2d_label_true_nucleus', + '2d_area_true_nucleus', + '2d_total_area_true_nucleus', + '2d_label_nucleus', + '2d_bbox-0_nucleus', + '2d_bbox-1_nucleus', + '2d_bbox-2_nucleus', + '2d_bbox-3_nucleus', + '2d_centroid-0_nucleus', + '2d_centroid-1_nucleus', + '2d_convex_area_nucleus', + '2d_eccentricity_nucleus', + '2d_equivalent_diameter_nucleus', + '2d_extent_nucleus', + '2d_filled_area_nucleus', + '2d_major_axis_length_nucleus', + '2d_minor_axis_length_nucleus', + '2d_orientation_nucleus', + '2d_solidity_nucleus', + '2d_img_shape_nucleus', + 'resolution_level_dup1', + '2d_label_true_pseudo_cell', + '2d_area_true_pseudo_cell', + '2d_total_area_true_pseudo_cell', + '2d_label_pseudo_cell', + '2d_bbox-0_pseudo_cell', + '2d_bbox-1_pseudo_cell', + '2d_bbox-2_pseudo_cell', + '2d_bbox-3_pseudo_cell', + '2d_centroid-0_pseudo_cell', + '2d_centroid-1_pseudo_cell', + '2d_convex_area_pseudo_cell', + '2d_eccentricity_pseudo_cell', + '2d_equivalent_diameter_pseudo_cell', + '2d_extent_pseudo_cell', + '2d_filled_area_pseudo_cell', + '2d_major_axis_length_pseudo_cell', + '2d_minor_axis_length_pseudo_cell', + '2d_orientation_pseudo_cell', + '2d_solidity_pseudo_cell', + '2d_img_shape_pseudo_cell', + 'resolution_level_dup2', + '2d_label_true_edge', + '2d_area_true_edge', + '2d_total_area_true_edge', + '2d_label_edge', + '2d_intensity_max_edge', + '2d_intensity_mean_edge', + '2d_intensity_min_edge', # this one is fun, its distance to nearest nucleus edge (different than centroid distance) + '2d_img_shape_edge', + 'resolution_level', + '2d_area_cyto', + 'inv_cyto_density', + + # created in add_groth_features.fit_tracks_to_model() + 'tscale_exponentialfit_volume', + 'atB_exponentialfit_volume', + 'rate_exponentialfit_volume', + 'tscale_linearfit_volume', + 'atB_linearfit_volume', + 'rate_linearfit_volume', + + # created in volume/filter_out_dips.find_and_remove_from_pivot() + 'volume_dips_has_peak', + 'volume_dips_volume_change_at_region', + 'volume_dips_width_at_center', + 'volume_dips_width_at_region', + 'volume_dips_max_volume_change', + 'volume_dips_peak_id_at_center', + 'volume_dips_peak_id_at_region', + 'volume_dips_total_number', ] diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 323de379..681babda 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -209,7 +209,9 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_nuc_cell_ratio', '2d_perimeter_nucleus', #can be dropped at end of global dataset filterig, only used in load_dataset_with_features. '2d_perimeter_pseudo_cell', # can be dropped at end of global dataset filtering, only used in load_dataset_with_features - + '2d_perimeter_nuc_cell_ratio', # used within filter data + 'bad_pseudo_cells_segmentation', + 'uncaught_pseudo_cell_artifact', @@ -230,9 +232,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): # volume_dips features 'volume_change_over_25_minutes', #used - '2d_perimeter_nuc_cell_ratio', # used within filter data - 'bad_pseudo_cells_segmentation', - 'uncaught_pseudo_cell_artifact', 'power_fit_volume', #used and could go to tfe 'volume_dips_peak_mask_at_region', #u sed for S10 C right and nice for TFE 'volume_dips_peak_mask_at_center', # used for S10C right @@ -250,15 +249,19 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): ] drop_list = [ + # not sure where these come from + # dropping in global_dataset_filtering (DONE) 'level_0', 'index', 'source_manifest_x', 'source_manifest_y', - '2d_label_true_nucleus', - '2d_area_true_nucleus', - '2d_total_area_true_nucleus', - '2d_label_nucleus', + # this set is defined in psuedo_cell_helper, not used after. Remove in gloabl_dataset_filtering (DONE) + # could be removed in pseudo_cell_helper.choose_columns() but that would require rerunning generate_main_manifest + '2d_label_true_nucleus', + '2d_area_true_nucleus', + '2d_total_area_true_nucleus', + '2d_label_nucleus', '2d_bbox-0_nucleus', '2d_bbox-1_nucleus', '2d_bbox-2_nucleus', @@ -308,7 +311,11 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'resolution_level', '2d_area_cyto', 'inv_cyto_density', - 'dxdt_5_volume_end', # can be dropped using code, duplicate column + + # can be dropped using code, duplicate column (DONE) + 'dxdt_5_volume_end', + + # drop in global_dataset_filtering (DONE) 'tscale_exponentialfit_volume', 'atB_exponentialfit_volume', 'rate_exponentialfit_volume', @@ -316,6 +323,8 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'atB_linearfit_volume', 'rate_linearfit_volume', + # created in volume/filter_out_dips.find_and_remove_from_pivot() and not used after. + # Remove in global_dataset_filtering (DONE) 'volume_dips_has_peak', 'volume_dips_volume_change_at_region', 'volume_dips_width_at_center', @@ -325,6 +334,8 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'volume_dips_peak_id_at_region', 'volume_dips_total_number', + # created in add_features.sum_mitotic_events_along_full_track() + # can remove in code (DONE) 'sum_number_of_frame_of_breakdown_neighbors', 'sum_number_of_frame_of_formation_neighbors', 'sum_has_mitotic_neighbor_breakdown', diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index bad35734..66a3896c 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -272,12 +272,6 @@ def get_scale_factor_table(dataset="all_baseline"): "2d_area_nuc_cell_ratio": "Nucleus area/(Pseudo)cell area", "2d_area_nucleus": "Nuclear area", "2d_area_pseudo_cell": "(Pseudo)cell area", - "2d_area_cyto": "Cytoplasmic area", - "inv_cyto_density": "1 / Cytoplasmic area", - "2d_eccentricity_nucleus": "Nuclear eccentricity (2d)", - "2d_intensity_min_edge" : "Min distance to (pseudo)cell edge", - "2d_intensity_mean_edge" : "Average distance to (pseudo)cell edge", - "2d_intensity_max_edge" : "Max distance to (pseudo)cell edge", # dip event features "volume_change_over_25_minutes": "Change in volume in 25 minute window", @@ -418,12 +412,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "2d_area_nuc_cell_ratio": "Nuclear area to (pseudo)cell area ratio", "2d_area_nucleus": "Nuclear area", "2d_area_pseudo_cell": "(Pseudo)cell area", - "2d_area_cyto": "Cytoplasmic area", - "inv_cyto_density": "Cytoplasmic density", - "2d_eccentricity_nucleus": "Nuclear eccentricity (2d)", - "2d_intensity_min_edge" : "Min distance to (pseudo)cell edge", - "2d_intensity_mean_edge" : "Average distance to (pseudo)cell edge", - "2d_intensity_max_edge" : "Max distance to (pseudo)cell edge", # dip event features "volume_change_over_25_minutes": "Change in volume in 25 minute window", diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index 73956992..dc405978 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -308,38 +308,11 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("2d_area_nucleus"), NucMorphFeatureSpec("2d_area_pseudo_cell"), - NucMorphFeatureSpec("2d_area_cyto"), - NucMorphFeatureSpec("2d_intensity_min_edge"), - NucMorphFeatureSpec("2d_intensity_max_edge"), - NucMorphFeatureSpec("2d_intensity_mean_edge"), + # potentially useful, but likely not needed in final dataset - NucMorphFeatureSpec("2d_convex_area_nucleus"), - NucMorphFeatureSpec("2d_eccentricity_nucleus"), - NucMorphFeatureSpec("2d_equivalent_diameter_nucleus"), - NucMorphFeatureSpec("2d_extent_nucleus"), - NucMorphFeatureSpec("2d_filled_area_nucleus"), - NucMorphFeatureSpec("2d_major_axis_length_nucleus"), - NucMorphFeatureSpec("2d_minor_axis_length_nucleus"), - NucMorphFeatureSpec("2d_orientation_nucleus"), NucMorphFeatureSpec("2d_perimeter_nucleus"), - NucMorphFeatureSpec("2d_solidity_nucleus"), - # potentially useful, but likely not needed in final dataset - NucMorphFeatureSpec("2d_convex_area_pseudo_cell"), - NucMorphFeatureSpec("2d_eccentricity_pseudo_cell"), - NucMorphFeatureSpec("2d_equivalent_diameter_pseudo_cell"), - NucMorphFeatureSpec("2d_extent_pseudo_cell"), - NucMorphFeatureSpec("2d_filled_area_pseudo_cell"), - NucMorphFeatureSpec("2d_major_axis_length_pseudo_cell"), - NucMorphFeatureSpec("2d_minor_axis_length_pseudo_cell"), - NucMorphFeatureSpec("2d_orientation_pseudo_cell"), NucMorphFeatureSpec("2d_perimeter_pseudo_cell"), - NucMorphFeatureSpec("2d_solidity_pseudo_cell"), - # extra - NucMorphFeatureSpec('inv_cyto_density'), NucMorphFeatureSpec('2d_perimeter_nuc_cell_ratio'), - NucMorphFeatureSpec('2d_eccentricity_nuc_cell_ratio'), - NucMorphFeatureSpec('label_pseudo_cell'), - # volume dip columns NucMorphFeatureSpec('colony_depth', type=FeatureType.DISCRETE), From 171998766983896b14078e85a2d3d17fd6aa4030 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 13:52:39 -0800 Subject: [PATCH 14/34] move extra neighbor_of_X code into examples folder --- .../analyze_effect_of_mitotic_neighbors.py | 64 ------------------- .../example_dying_track_neighbors.py | 0 .../example_mitotic_track_neighbors.py | 0 ..._tracks_with_multiple_mitotic_neighbors.py | 0 .../example_timepoint_neighbors_of_dying.py | 0 .../example_timepoint_numbers_of_mitotic.py | 0 ...ample_timepoint_of_neighbors_of_mitotic.py | 0 7 files changed, 64 deletions(-) delete mode 100644 nuc_morph_analysis/analyses/neighbor_of_X/analyze_effect_of_mitotic_neighbors.py rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_dying_track_neighbors.py (100%) rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_mitotic_track_neighbors.py (100%) rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_of_tracks_with_multiple_mitotic_neighbors.py (100%) rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_timepoint_neighbors_of_dying.py (100%) rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_timepoint_numbers_of_mitotic.py (100%) rename nuc_morph_analysis/analyses/neighbor_of_X/{ => examples}/example_timepoint_of_neighbors_of_mitotic.py (100%) diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/analyze_effect_of_mitotic_neighbors.py b/nuc_morph_analysis/analyses/neighbor_of_X/analyze_effect_of_mitotic_neighbors.py deleted file mode 100644 index e53edd5a..00000000 --- a/nuc_morph_analysis/analyses/neighbor_of_X/analyze_effect_of_mitotic_neighbors.py +++ /dev/null @@ -1,64 +0,0 @@ -#%% -# this script attempts to correlate the number of mitotic -from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering,filter_data -from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric -from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot -import numpy as np -import os -from pathlib import Path -import matplotlib.pyplot as plt -from sklearn.linear_model import LinearRegression -#%% -df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline') -#%% -dfc = df[df['colony'] == 'medium'] -dft = filter_data.track_level_features(dfc) - -#%% -# ask how correlated number of mitotic neighbors is with other features -mitotic_event_features = [ - 'number_of_frame_of_breakdown_neighbors', - 'has_mitotic_neighbor_breakdown', - 'has_dying_neighbor', - 'number_of_frame_of_death_neighbors' -] - -# set figure directory -resolution_level = 1 -figdir = Path(__file__).parent / "figures" / "analysis_of_neighbor_events_with_features" -os.makedirs(figdir,exist_ok=True) - - -ycol_list = ['volume_fold_change_BC','duration_BC'] -for ycol in ycol_list: - nrows = 1 - ncols = len(mitotic_event_features) - fig,ax = plt.subplots(nrows,ncols,figsize=(ncols*3,nrows*3), layout='constrained') - assert type(ax) == np.ndarray # for mypy - for fi,feature in enumerate(mitotic_event_features): - xcol = f"sum_{feature}" - xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol) - yscale,ylabel,yunit,_ = get_plot_labels_for_metric(ycol) - - x = dft[xcol] * xscale - y = dft[ycol] - - # fit a linear regression - model = LinearRegression() - model.fit(x.values.reshape(-1,1),y) - y_pred = model.predict(x.values.reshape(-1,1)) - r2 = model.score(x.values.reshape(-1,1),y) - - curr_ax = ax[fi] - assert type(curr_ax) == plt.Axes # for mypy - curr_ax.scatter(x,y) - curr_ax.set_xlabel(f'{xlabel} {xunit}') - curr_ax.set_ylabel(f'{ylabel} {yunit}') - curr_ax.set_title(f'{feature}\nvs\n{ycol}') - curr_ax.plot(x,y_pred,'--') - curr_ax.text(0.05,0.95,f'R^2 = {r2:.2f}',transform=curr_ax.transAxes, - va='top',ha='left',fontsize=8) - savename = figdir / f'{ycol}_vs_neighbor_event_features.png' - save_and_show_plot(str(savename),file_extension='.png',figure=fig,transparent=False) - plt.show() - diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_dying_track_neighbors.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_dying_track_neighbors.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_dying_track_neighbors.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_dying_track_neighbors.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_mitotic_track_neighbors.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_mitotic_track_neighbors.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_mitotic_track_neighbors.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_mitotic_track_neighbors.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_of_tracks_with_multiple_mitotic_neighbors.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_of_tracks_with_multiple_mitotic_neighbors.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_of_tracks_with_multiple_mitotic_neighbors.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_of_tracks_with_multiple_mitotic_neighbors.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_neighbors_of_dying.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_neighbors_of_dying.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_neighbors_of_dying.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_neighbors_of_dying.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_numbers_of_mitotic.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_numbers_of_mitotic.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_of_neighbors_of_mitotic.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_of_neighbors_of_mitotic.py similarity index 100% rename from nuc_morph_analysis/analyses/neighbor_of_X/example_timepoint_of_neighbors_of_mitotic.py rename to nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_of_neighbors_of_mitotic.py From 06169b583ece2e016eadff7ece2ea6232e8b5e6f Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 13:53:46 -0800 Subject: [PATCH 15/34] clarify var names --- .../lib/preprocessing/save_datasets_for_quilt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 681babda..32921cf5 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -179,7 +179,7 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): "neighbor_avg_lrm_xy_aspect_90um", "neighbor_avg_lrm_mesh_sa_90um", "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", - "neighbor_avg_lrm_dxdt_48_volume_90um", + # "neighbor_avg_lrm_dxdt_48_volume_90um", #used in lrm "sisters_volume_at_B", @@ -348,16 +348,16 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): ] -new_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] +unnacounted_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] print(len(keep_list)) -print(len(new_cols)) +print(len(unnacounted_cols)) overlap = [x for x in keep_list if x in drop_list] overlap2 = [x for x in drop_list if x in keep_list] print(len(overlap),len(overlap2)) #%% -for col in new_cols: +for col in unnacounted_cols: print(col) From e8080d98969f1ec012ee800b1ca9392604f06cd5 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 14:05:02 -0800 Subject: [PATCH 16/34] remove unused number_of_frame_X_neighbor columns and corresponding example code --- .../example_timepoint_numbers_of_mitotic.py | 70 ------------------- .../preprocessing/save_datasets_for_quilt.py | 12 ++-- 2 files changed, 8 insertions(+), 74 deletions(-) delete mode 100644 nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py b/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py deleted file mode 100644 index ad097c89..00000000 --- a/nuc_morph_analysis/analyses/neighbor_of_X/examples/example_timepoint_numbers_of_mitotic.py +++ /dev/null @@ -1,70 +0,0 @@ -#%% -from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot -from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering -from pathlib import Path -import pandas as pd -import os -import matplotlib.pyplot as plt -from nuc_morph_analysis.lib.preprocessing import load_data -from nuc_morph_analysis.lib.visualization.plotting_tools import plot_colorized_img_with_labels - -# TEMP: loading local for testing and speed - -# for testing only use a subset of timepoints - -def run_example(df:pd.DataFrame, colony:str = 'medium', timepoint:int = 57, resolution_level:int =1): - """ - this code will plot the number of mitotic neighbors for cells at a specific timepoint - - Parameters - ---------- - df : pd.DataFrame - the dataframe containing the data - colony : str - the colony to analyze - timepoint : int - the timepoint to analyze - resolution_level : int - the resolution level to use for the images (OME-ZARR) - - Returns - ------- - None - - Outputs - ------- - Saves a figure to the figures/example_timepoint_numbers_of_mitotic directory - """ - # set figure directory - figdir = Path(__file__).parent / "figures" / "example_timepoint_numbers_of_mitotic" - os.makedirs(figdir,exist_ok=True) - - # color by number of mitotic neighbors - # now plot the image with the mitotic neighbors - reader = load_data.get_dataset_segmentation_file_reader(colony) - if resolution_level>0: - reader.set_resolution_level(resolution_level) - - lazy_img = reader.get_image_dask_data("ZYX",T=timepoint) - img= lazy_img.compute() - dfm = df.loc[df['colony']==colony].copy() - dft = dfm[dfm['index_sequence']==timepoint] - colormap_dict = {} - cmap1 = plt.get_cmap('Dark2_r') - col = 'number_of_frame_of_breakdown_neighbors' - colormap_dict.update({f'{col}_{i}':(col,i,i+1,cmap1.colors[i],f"{i} mitotic neighbors") for i in range(dft[col].max()+1)}) # type: ignore - colormap_dict.update({'frame_of_breakdown':('frame_of_breakdown',True,8,(1,0,0),f"breakdown event")}) - fig,ax = plt.subplots(figsize=(5,5),layout='constrained') - _ = plot_colorized_img_with_labels(ax,img,dft,colormap_dict) - savename = figdir / f'{colony}-{timepoint}-{col}_number_of_neighbors.png' - savepath = figdir / savename - save_and_show_plot(savepath.as_posix(), - file_extension='.png', - figure=fig, - transparent=False, - ) - plt.show() - -if __name__ == "__main__": - df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline') - run_example(df) \ No newline at end of file diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 32921cf5..13fd937b 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -219,16 +219,15 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'frame_of_breakdown', # used in figure_mitotic_filtering_examples.py 'frame_of_formation',# used in figure_mitotic_filtering_examples.py 'has_mitotic_neighbor_breakdown', # used in validation/illustration code, useful to keep - 'number_of_frame_of_breakdown_neighbors', # used in neighbor_of_X/example_timepoint_numbers_of_mitotic.py 'has_mitotic_neighbor_formation', # used in validation/illustration code, useful to keep - 'number_of_frame_of_formation_neighbors', # used in neighbor_of_X/example_timepoint_numbers_of_mitotic.py 'has_mitotic_neighbor_breakdown_forward_dilated', # used in neighbor_of_X/example 'has_mitotic_neighbor_formation_backward_dilated', # used in neighbor_of_X/example 'has_mitotic_neighbor_dilated', # used in figure_mitotic_filtering_examples.py 'identified_death', # used in neighbor_of_X/example 'frame_of_death', # used in neighbor_of_X/example - 'number_of_frame_of_death_neighbors', # used - 'has_dying_neighbor_forward_dilated', #used + + + 'has_dying_neighbor_forward_dilated', #used, must keep # volume_dips features 'volume_change_over_25_minutes', #used @@ -346,6 +345,11 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'sum_has_dying_neighbor_forward_dilated', 'sum_number_of_frame_of_death_neighbors', + # created in labeling_neighbors_helper.find_neighbors_of_cells() + # remove in global_dataset_filtering (DONE) + 'number_of_frame_of_breakdown_neighbors', # CAN BE DROPPED, not used + 'number_of_frame_of_death_neighbors', # CAN BE DROPPED, not used + 'number_of_frame_of_formation_neighbors', # CAN BE DROPPED, not used ] unnacounted_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] From dc516b5bb87b7f75697f2a1a546422e3e08f5e6d Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Thu, 5 Dec 2024 14:28:07 -0800 Subject: [PATCH 17/34] add code to remove those columns using global_dataset_filtering --- .../lib/preprocessing/global_dataset_filtering.py | 7 +++++++ .../lib/preprocessing/save_datasets_for_quilt.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index feebf348..7ee80ce8 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -411,6 +411,13 @@ def merge_datasets(df_all, df_full): 'volume_dips_peak_id_at_center', 'volume_dips_peak_id_at_region', 'volume_dips_total_number', + + # created in labeling_neighbors_helper.find_neighbors_of_cells() + # not used + 'number_of_frame_of_breakdown_neighbors', + 'number_of_frame_of_death_neighbors', + 'number_of_frame_of_formation_neighbors', + ] diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 13fd937b..8fa12006 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -353,7 +353,8 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): ] unnacounted_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] - +still_needs_dropping = [col for col in new_cols if col not in keep_list] +print(len(still_needs_dropping), still_needs_dropping) print(len(keep_list)) print(len(unnacounted_cols)) overlap = [x for x in keep_list if x in drop_list] From 6ef56f6a6d6be1716d8cb3e3184ff46cac6b0805 Mon Sep 17 00:00:00 2001 From: Julie Dixon Date: Thu, 5 Dec 2024 14:46:09 -0800 Subject: [PATCH 18/34] remove dropped columns from label_tables --- .../lib/visualization/label_tables.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index 66a3896c..a7d167b6 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -52,7 +52,6 @@ def get_scale_factor_table(dataset="all_baseline"): "width", "length", "distance_from_centroid", - "max_distance_from_centroid", ): pix_size, ("mesh_sa"): pix_size**2, @@ -78,8 +77,8 @@ def get_scale_factor_table(dataset="all_baseline"): ( "2d_area_nucleus", "2d_area_pseudo_cell", - ): (pix_size/2.5)**2, # resolution_level 1 is 2.5x downsampled - ("2d_area_pseudo_cell"): (pix_size/2.5)**2, # resolution_level 1 is 2.5x downsampled + ): (pix_size/2.5)**2, + ("2d_area_pseudo_cell"): (pix_size/2.5)**2, ("2d_area_nuc_cell_ratio"): 1, } @@ -213,14 +212,10 @@ def get_scale_factor_table(dataset="all_baseline"): "distance": "Distance", "distance_from_centroid": "Distance from centroid", "normalized_distance_from_centroid": "Normalized distance from centroid", - "max_distance_from_centroid": "Max distance from centroid", "colony_depth": "Colony depth", "normalized_colony_depth": "Normalized colony depth", - "max_colony_depth": "Max colony depth", "avg_colony_depth": "Average colony depth", # Density - "colony_non_circularity": "Colony non-circularity", - "colony_non_circularity_scaled": "Scaled colony non-circularity", "avg_early_density": "Early density", "avg_late_density": "Late density", "density": "Density", @@ -263,9 +258,6 @@ def get_scale_factor_table(dataset="all_baseline"): 'mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um': "Neighborhood avg. mean density", # mitotic and apoptotic neighbor columns - "number_of_frame_of_breakdown_neighbors": "# of neighboring cells undergoing breakdown", - "number_of_frame_of_formation_neighbors": "# of neighboring cells undergoing formation", - "number_of_frame_of_death_neighbors": "# of neighboring cells undergoing death", "normalized_sum_has_mitotic_neighbor": "Frequency of mitotic adjacent neighbors", "normalized_sum_has_dying_neighbor": "Frequency of dying adjacent neighbors", # 2D area features @@ -424,15 +416,11 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "width", "length", "distance_from_centroid", - "max_distance_from_centroid", "height", "height_at_B", "height_at_C", "avg_height", "distance", - "2d_intensity_min_edge", - "2d_intensity_mean_edge", - "2d_intensity_max_edge", ): "(μm)", ( "RMSE_linearityfit_SA", @@ -445,7 +433,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "seg_twoD_zMIP_area", "2d_area_nucleus", "2d_area_pseudo_cell", - "2d_area_cyto", ): "(μm²)", ( "volume", @@ -466,7 +453,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "avg_early_density", "avg_late_density", "avg_density", - "inv_cyto_density", ): "(μm⁻²)", # Temporal ( From 9a423850ef58cf3ae444e2740bc9a5c49a4eb4d4 Mon Sep 17 00:00:00 2001 From: Julie Dixon Date: Thu, 5 Dec 2024 21:58:24 -0800 Subject: [PATCH 19/34] remove density --- ...date_watershed_psuedo_cell_seg_workflow.py | 2 - nuc_morph_analysis/analyses/height/plot.py | 28 ----- .../lib/preprocessing/add_colony_metrics.py | 14 +-- .../lib/preprocessing/filter_data.py | 12 +- .../preprocessing/generate_main_manifest.py | 2 +- .../generate_perturbation_manifest.py | 2 +- .../preprocessing/global_dataset_filtering.py | 1 + .../preprocessing/save_datasets_for_quilt.py | 105 ---------------- .../export_code/export_helper.py | 2 - .../vis_code/make_single_track_movie.py | 1 - .../vis_code/single_track_contact.py | 2 - .../twoD_zMIP_area/pseudo_cell_helper.py | 2 +- .../lib/visualization/glossary.py | 1 - .../lib/visualization/label_tables.py | 22 +--- .../visualization/write_data_for_colorizer.py | 4 - test/lib/preprocessing/test_voronoi.py | 119 +----------------- 16 files changed, 14 insertions(+), 305 deletions(-) diff --git a/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py b/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py index 716b17c3..4bd9daf5 100644 --- a/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py +++ b/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py @@ -232,8 +232,6 @@ def run_validation_and_plot(TIMEPOINT=48,colony='medium',RESOLUTION_LEVEL=1,plot plot_colorized_image_with_contours(img_dict,dft,'2d_area_pseudo_cell','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True) plot_colorized_image_with_contours(img_dict,dft,'2d_intensity_min_edge','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=False) plot_colorized_image_with_contours(img_dict,dft,'2d_intensity_min_edge','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True) - plot_colorized_image_with_contours(img_dict,dft,'density','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=False) - plot_colorized_image_with_contours(img_dict,dft,'density','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True) plot_colorized_image_with_contours(img_dict,dft,'zeros','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True) return dft0 else: diff --git a/nuc_morph_analysis/analyses/height/plot.py b/nuc_morph_analysis/analyses/height/plot.py index cac15359..79da6004 100644 --- a/nuc_morph_analysis/analyses/height/plot.py +++ b/nuc_morph_analysis/analyses/height/plot.py @@ -96,34 +96,6 @@ def height_colony_time_alignment( ) -def calculate_mean_density(df, scale): - """ - Calculate the mean height for a given index_sequence (i.e. timepoint) and the standard deviation of the mean. - - Parameters - ---------- - df : pandas.DataFrame - DataFrame containing the data. - pixel_size : float - Pixel size in microns. - - Returns - ------- - mean_height : list - List of mean heights for each index_sequence. - standard_dev_height : list - List of standard deviations of the mean heights for each index_sequence. - """ - mean = [] - standard_dev = [] - feature_col = "2d_area_nuc_cell_ratio" - for _, df_frame in df.groupby("index_sequence"): - density = df_frame[feature_col].values * scale - mean.append(np.nanmean(density)) - standard_dev.append(np.nanstd(density)) - return mean, standard_dev - - def density_colony_time_alignment( df, pixel_size, diff --git a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py index bffaae95..78dc1ac0 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py +++ b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py @@ -34,7 +34,6 @@ def add_colony_metrics(df: pd.DataFrame): 1 are depth 2, etc. neighbors: string. List of neighboring Cell IDs neigh_distance: float. Unit: voxels. Mean distance to neighboring cells - density: float. Unit: voxels. 1 / neigh_distance^2 """ # This function is only intended to run on data from one colony at a time if "dataset" in df.columns: @@ -61,9 +60,6 @@ def _add_colony_metrics_one_tp(df_timepoint: pd.DataFrame): for _, (lbl, dist) in enumerate(neigh_dists.items()): df_timepoint.loc[df_timepoint["label_img"] == lbl, "neigh_distance"] = dist - for _, (lbl, density) in enumerate(densities.items()): - df_timepoint.loc[df_timepoint["label_img"] == lbl, "density"] = density - for _, (lbl, neighbors) in enumerate(neighborhoods.items()): neighbor_ids = [] for neighbor in neighbors: @@ -88,16 +84,15 @@ def _calc_colony_metrics(df_timepoint): neighbors = _make_neighbor_map(voronoi, labels) centroids_by_label = {label: centroids_list[index] for index, label in enumerate(labels)} - neigh_distance, density = _calculate_distance_density(labels, neighbors, centroids_by_label) + neigh_distance, _ = _calculate_distance(labels, neighbors, centroids_by_label) depth1_labels = _get_depth1_labels(labels, centroids_list, voronoi) depth_map = calculate_depth(neighbors, depth1_labels) - return depth_map, neighbors, neigh_distance, density + return depth_map, neighbors, neigh_distance -def _calculate_distance_density(labels, neighbors, centroids): - density = {} +def _calculate_distance(labels, neighbors, centroids): neigh_distance = {} for lbl in labels: try: @@ -114,9 +109,8 @@ def _calculate_distance_density(labels, neighbors, centroids): if neighbor != lbl: dist = np.sqrt(np.sum((centroid - np.array(centroids[neighbor])) ** 2, axis=0)) dists.append(dist) - density[lbl] = 1 / np.mean(dists) ** 2 neigh_distance[lbl] = np.mean(dists) - return neigh_distance, density + return neigh_distance def _make_neighbor_map(voronoi, labels): diff --git a/nuc_morph_analysis/lib/preprocessing/filter_data.py b/nuc_morph_analysis/lib/preprocessing/filter_data.py index 0ff547d5..80811b01 100644 --- a/nuc_morph_analysis/lib/preprocessing/filter_data.py +++ b/nuc_morph_analysis/lib/preprocessing/filter_data.py @@ -1012,12 +1012,10 @@ def remove_expected_pseudo_cell_artifacts(dfm, apply_to_nucleus_too=False, verbo compiled_log = log1 | log2 | log3 | log4 | log5 # define the columns to apply the filter to - extra_cols = ['density'] - # cols = [x for x in cols if '2d_' in dfm.columns] if apply_to_nucleus_too: - cols = [x for x in dfm.columns if ('2d_' in x) & ('label' not in x)] + extra_cols + cols = [x for x in dfm.columns if ('2d_' in x) & ('label' not in x)] else: - cols = [x for x in dfm.columns if ('2d_' in x) & ('nucleus' not in x) & ('label' not in x)] + extra_cols + cols = [x for x in dfm.columns if ('2d_' in x) & ('nucleus' not in x) & ('label' not in x)] # apply the filter dfm.loc[compiled_log, cols] = np.nan @@ -1064,12 +1062,10 @@ def remove_uncaught_pseudo_cell_artifacts(df, apply_to_nucleus_too=False, verbos compiled_log = log1 | log2 | log3 # define the columns to apply the filter to - extra_cols = ['density'] - # cols = [x for x in cols if '2d_' in dfm.columns] if apply_to_nucleus_too: - cols = [x for x in df.columns if ('2d_' in x) & ('label' not in x)] + extra_cols + cols = [x for x in df.columns if ('2d_' in x) & ('label' not in x)] else: - cols = [x for x in df.columns if ('2d_' in x) & ('nucleus' not in x) & ('label' not in x)] + extra_cols + cols = [x for x in df.columns if ('2d_' in x) & ('nucleus' not in x) & ('label' not in x)] # apply the filter df.loc[compiled_log, cols] = np.nan diff --git a/nuc_morph_analysis/lib/preprocessing/generate_main_manifest.py b/nuc_morph_analysis/lib/preprocessing/generate_main_manifest.py index 153c8e6d..f3cb122d 100644 --- a/nuc_morph_analysis/lib/preprocessing/generate_main_manifest.py +++ b/nuc_morph_analysis/lib/preprocessing/generate_main_manifest.py @@ -89,7 +89,7 @@ def generate_manifest_one_colony(morflowgenesis_df, colony, manual_lineage_annot # -------------------------- # STEP 5: Add colony metrics # -------------------------- - # density and other add_colony_metrics features + # add_colony_metrics features logging.info("Calculating colony metrics") step5_df = step4_df.copy() step5_df = add_colony_metrics(step5_df) diff --git a/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py b/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py index bcca5632..a3c8c680 100644 --- a/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py +++ b/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py @@ -84,7 +84,7 @@ def generate_manifest_one_colony(morflowgenesis_df, dataset, experiments=None): # -------------------------- # STEP 5: Add colony metrics # -------------------------- - # density and other add_colony_metrics features + # add_colony_metrics features logging.info("Calculating colony metrics") return add_colony_metrics(step5_df) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index 7ee80ce8..bdbd7f79 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -393,6 +393,7 @@ def merge_datasets(df_all, df_full): 'resolution_level', '2d_area_cyto', 'inv_cyto_density', + 'density' # created in add_groth_features.fit_tracks_to_model() 'tscale_exponentialfit_volume', diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 8fa12006..f0e498be 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -90,7 +90,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): "entering_or_exiting_division", "neighbors", "neigh_distance", - "density", "is_tp_outlier", "track_length", "is_outlier_by_short_track", @@ -247,110 +246,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): ] -drop_list = [ - # not sure where these come from - # dropping in global_dataset_filtering (DONE) - 'level_0', - 'index', - 'source_manifest_x', - 'source_manifest_y', - - # this set is defined in psuedo_cell_helper, not used after. Remove in gloabl_dataset_filtering (DONE) - # could be removed in pseudo_cell_helper.choose_columns() but that would require rerunning generate_main_manifest - '2d_label_true_nucleus', - '2d_area_true_nucleus', - '2d_total_area_true_nucleus', - '2d_label_nucleus', - '2d_bbox-0_nucleus', - '2d_bbox-1_nucleus', - '2d_bbox-2_nucleus', - '2d_bbox-3_nucleus', - '2d_centroid-0_nucleus', - '2d_centroid-1_nucleus', - '2d_convex_area_nucleus', - '2d_eccentricity_nucleus', - '2d_equivalent_diameter_nucleus', - '2d_extent_nucleus', - '2d_filled_area_nucleus', - '2d_major_axis_length_nucleus', - '2d_minor_axis_length_nucleus', - '2d_orientation_nucleus', - '2d_solidity_nucleus', - '2d_img_shape_nucleus', - 'resolution_level_dup1', - '2d_label_true_pseudo_cell', - '2d_area_true_pseudo_cell', - '2d_total_area_true_pseudo_cell', - '2d_label_pseudo_cell', - '2d_bbox-0_pseudo_cell', - '2d_bbox-1_pseudo_cell', - '2d_bbox-2_pseudo_cell', - '2d_bbox-3_pseudo_cell', - '2d_centroid-0_pseudo_cell', - '2d_centroid-1_pseudo_cell', - '2d_convex_area_pseudo_cell', - '2d_eccentricity_pseudo_cell', - '2d_equivalent_diameter_pseudo_cell', - '2d_extent_pseudo_cell', - '2d_filled_area_pseudo_cell', - '2d_major_axis_length_pseudo_cell', - '2d_minor_axis_length_pseudo_cell', - '2d_orientation_pseudo_cell', - '2d_solidity_pseudo_cell', - '2d_img_shape_pseudo_cell', - 'resolution_level_dup2', - '2d_label_true_edge', - '2d_area_true_edge', - '2d_total_area_true_edge', - '2d_label_edge', - '2d_intensity_max_edge', - '2d_intensity_mean_edge', - '2d_intensity_min_edge', # this one is fun, its distance to nearest nucleus edge (different than centroid distance) - '2d_img_shape_edge', - 'resolution_level', - '2d_area_cyto', - 'inv_cyto_density', - - # can be dropped using code, duplicate column (DONE) - 'dxdt_5_volume_end', - - # drop in global_dataset_filtering (DONE) - 'tscale_exponentialfit_volume', - 'atB_exponentialfit_volume', - 'rate_exponentialfit_volume', - 'tscale_linearfit_volume', - 'atB_linearfit_volume', - 'rate_linearfit_volume', - - # created in volume/filter_out_dips.find_and_remove_from_pivot() and not used after. - # Remove in global_dataset_filtering (DONE) - 'volume_dips_has_peak', - 'volume_dips_volume_change_at_region', - 'volume_dips_width_at_center', - 'volume_dips_width_at_region', - 'volume_dips_max_volume_change', - 'volume_dips_peak_id_at_center', - 'volume_dips_peak_id_at_region', - 'volume_dips_total_number', - - # created in add_features.sum_mitotic_events_along_full_track() - # can remove in code (DONE) - 'sum_number_of_frame_of_breakdown_neighbors', - 'sum_number_of_frame_of_formation_neighbors', - 'sum_has_mitotic_neighbor_breakdown', - 'sum_has_mitotic_neighbor_formation', - 'sum_has_mitotic_neighbor_breakdown_forward_dilated', - 'sum_has_mitotic_neighbor_formation_backward_dilated', - 'sum_has_mitotic_neighbor_dilated', - 'sum_has_dying_neighbor_forward_dilated', - 'sum_number_of_frame_of_death_neighbors', - - # created in labeling_neighbors_helper.find_neighbors_of_cells() - # remove in global_dataset_filtering (DONE) - 'number_of_frame_of_breakdown_neighbors', # CAN BE DROPPED, not used - 'number_of_frame_of_death_neighbors', # CAN BE DROPPED, not used - 'number_of_frame_of_formation_neighbors', # CAN BE DROPPED, not used - ] unnacounted_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] still_needs_dropping = [col for col in new_cols if col not in keep_list] diff --git a/nuc_morph_analysis/lib/preprocessing/single_track_contact/export_code/export_helper.py b/nuc_morph_analysis/lib/preprocessing/single_track_contact/export_code/export_helper.py index 47c7e1fc..d7d34bd1 100644 --- a/nuc_morph_analysis/lib/preprocessing/single_track_contact/export_code/export_helper.py +++ b/nuc_morph_analysis/lib/preprocessing/single_track_contact/export_code/export_helper.py @@ -40,7 +40,6 @@ ("aspect_ax", ["xz_aspect", "xy_aspect", "zy_aspect"]), ("depth_ax", ["colony_depth"]), ("neigh_ax", ["dxdt_12_volume", "dxdt_24_volume", "dxdt_48_volume"]), - ("density_ax", ["density"]), ] @@ -251,7 +250,6 @@ def create_the_figure(): ax_dict["aspect_ax"] = fig.add_subplot(gs[15:19, 1:7]) ax_dict["neigh_ax"] = fig.add_subplot(gs[15:19, 9:15]) - ax_dict["density_ax"] = fig.add_subplot(gs[15:19, 17:23]) ax_dict["depth_ax"] = fig.add_subplot(gs[15:19, 25:31]) return fig, ax_dict diff --git a/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/make_single_track_movie.py b/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/make_single_track_movie.py index d9df1e77..e8b8ab86 100644 --- a/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/make_single_track_movie.py +++ b/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/make_single_track_movie.py @@ -21,7 +21,6 @@ "zy_aspect", "colony_depth", "neigh_distance", - "density", "dxdt_48_volume", "dxdt_24_volume", "dxdt_12_volume", diff --git a/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/single_track_contact.py b/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/single_track_contact.py index 1f75a69e..46d6eed4 100644 --- a/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/single_track_contact.py +++ b/nuc_morph_analysis/lib/preprocessing/single_track_contact/vis_code/single_track_contact.py @@ -154,7 +154,6 @@ def get_yticks(column, ticks_or_lims="ticks"): "xy_aspect": (0, 3, 0.5), "colony_depth": (0, 8, 2), "neigh_distance": (0, 80, 20), - "density": (0, 0.025, 0.005), "dxdt_48_volume": (-50, 150, 50), "dxdt_24_volume": (-50, 150, 50), "dxdt_12_volume": (-50, 150, 50), @@ -205,7 +204,6 @@ def make_plot( "zy_aspect": "tab:olive", "colony_depth": "k", "neigh_distance": "k", - "density": "k", "dxdt_48_volume": "tab:blue", "dxdt_24_volume": "tab:orange", "dxdt_12_volume": "tab:purple", diff --git a/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py b/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py index 378c23b1..8273e117 100644 --- a/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py +++ b/nuc_morph_analysis/lib/preprocessing/twoD_zMIP_area/pseudo_cell_helper.py @@ -324,7 +324,7 @@ def choose_columns(df_2d): """ merge_cols = ['label_img','index_sequence','colony'] feature_cols = ['2d_area_pseudo_cell','2d_area_nucleus','2d_area_nuc_cell_ratio', - '2d_area_cyto','inv_cyto_density', + '2d_area_cyto', '2d_resolution_level_nucleus','2d_resolution_level_pseudo_cell'] columns_to_keep = merge_cols + feature_cols df_2d = df_2d[columns_to_keep] diff --git a/nuc_morph_analysis/lib/visualization/glossary.py b/nuc_morph_analysis/lib/visualization/glossary.py index 601d853d..6b734cd1 100644 --- a/nuc_morph_analysis/lib/visualization/glossary.py +++ b/nuc_morph_analysis/lib/visualization/glossary.py @@ -11,7 +11,6 @@ "late_growth_rate_by_endpoints": "The growth rate of the growth phase calculated by endpoints (i.e., “volume at the end of growth” - “volume at start of growth”) / “growth duration”.", "tscale_linearityfit_volume": "Each volume trajectory was fit to a power law scaling with time 𝑉(𝑡) =𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼 over all time points during the growth phase. This feature is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory.", "dxdt_48_volume": "The change in volume over time for a four hour rolling window for middle interphase time points of full-interphase trajectories.", - "density": "Inverse of the squared average distance to centroids of all neighboring nuclei. Neighbors were determined using a Voronoi tessellation graph.", "normalized_time": "The time within interphase normalized by the total interphase time for full-interphase nuclear trajectories. This ranges from 0 to 1, where 0 represents the start of interphase and 1 represents the end of interphase.", "sync_time_Ff": "Time synchronized to start of interphase for each single full-interphase nuclear trajectory (i.e., all trajectories start with a synchronized time of 0 hours).", "time_at_B": "The calculated time of the start of the growth phase in single full-interphase nuclear trajectory. This time is relative to the start of the timelapse imaging.", diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index a7d167b6..5983c4b5 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -56,9 +56,7 @@ def get_scale_factor_table(dataset="all_baseline"): ("mesh_sa"): pix_size**2, ("volume", "volume_sub", "volume_change_over_25_minutes"): pix_size**3, - ("fit_volume"): 1, #already scaled in code - ("density", "avg_density", "avg_early_density", "avg_late_density"): 1 / pix_size**2, - + ("fit_volume"): 1, #already scaled in code ( "colony_time", "sync_time_Ff", @@ -215,11 +213,6 @@ def get_scale_factor_table(dataset="all_baseline"): "colony_depth": "Colony depth", "normalized_colony_depth": "Normalized colony depth", "avg_colony_depth": "Average colony depth", - # Density - "avg_early_density": "Early density", - "avg_late_density": "Late density", - "density": "Density", - "avg_density": "Average density", # Lineage "parent_id": "Parent ID", "family_id": "Family ID", @@ -240,7 +233,6 @@ def get_scale_factor_table(dataset="all_baseline"): "seg_twoD_zMIP_area": "total projected nuclear area", # LRM feats "height_at_B": "Starting height", - "density_at_B": "Starting density", "xy_aspect_at_B": "Starting XY aspect ratio", "SA_vol_ratio_at_B": "Starting surface area/volume ratio", "early_neighbor_avg_dxdt_48_volume_90um": "Neighborhood avg. ~starting transient growth rate", @@ -389,7 +381,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "zy_aspect": "YZ aspect ratio", "distance_from_centroid": "Distance from colony center", "normalized_colony_depth": "Normalized distance from colony center", - "density": "Density", "family_id": "Family ID", "is_growth_outlier": "Growth outlier filter", "termination": "Trajectory termination annotation", @@ -448,12 +439,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "volume_change_over_25_minutes", ): "(μm\u00B3)", "SA_vol_ratio": "(μm⁻¹)", - ( - "density", - "avg_early_density", - "avg_late_density", - "avg_density", - ): "(μm⁻²)", # Temporal ( "colony_time", @@ -576,8 +561,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "xz_aspect_fold_change_BC": (0, 3.25), # Colony Position "avg_colony_depth": (0, 9), - # Density - "avg_density": (6.1e-4, 4.6e-3), } # limits when growth outliers are left in the dataset @@ -633,8 +616,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "xz_aspect_fold_change_BC": (0, 3.25), # Colony Position "avg_colony_depth": (0, 9), - # Density - "avg_density": (4e-6, 6e-5), # colony area "colony_area": (0, 200000), # max area of 20 FOV i 170,000 µm^2 "nucleus_colony_area_ratio": (0.3, 0.55), @@ -648,7 +629,6 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "volume": (400, 1200), "exiting_mitosis": (0, 0.4), "nucleus_colony_area_ratio": (2, 3), - "density": (0.0010, 0.0045), "dxdt_48_volume": (10, 80), "dxdt_24_volume": (10, 80), "tscale_linearityfit_volume": (0.2, 3), diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index dc405978..1a75f913 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -97,7 +97,6 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("height"), NucMorphFeatureSpec("xy_aspect"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), NucMorphFeatureSpec("normalized_colony_depth"), NucMorphFeatureSpec( "termination", @@ -122,7 +121,6 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), @@ -159,7 +157,6 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), @@ -196,7 +193,6 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), diff --git a/test/lib/preprocessing/test_voronoi.py b/test/lib/preprocessing/test_voronoi.py index fe95855b..18d0cc00 100644 --- a/test/lib/preprocessing/test_voronoi.py +++ b/test/lib/preprocessing/test_voronoi.py @@ -3,121 +3,7 @@ import pandas as pd -def test_voronoi_real_densities(): - """ - Load 2 frames of medium colony, - execute voronoi calculation to get neighbors, distances and densities - compare to gt density (from colony_metrics dataset in FMS) - """ - - # These are all neighbors and relevant features for 2 cells in medium colony - # Cell 1 - 41f330ab2e8a5ad827ca53e66b632e3a3fff209c260614819960d9cc (a) - # Cell 2 - 7ea0e4bd3587da25dde34c2479e0c7ac10787c4db34259b441664a8f (b) - - index_seqs = [355, 356, 355, 355, 355, 355, 355, 355, 356, 356, 356, 356, 356, 356] - label_img = [ - 95.0, - 178.0, - 190.0, - 201.0, - 189.0, - 194.0, - 98.0, - 188.0, - 170.0, - 188.0, - 171.0, - 179.0, - 182.0, - 85.0, - ] - ids = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n"] - vols = [ - 1069444.0, - 1090362.0, - 348357.0, - 329564.0, - 422921.0, - 591441.0, - 398974.0, - 438732.0, - 354709.0, - 324528.0, - 428091.0, - 611958.0, - 410333.0, - 489027.0, - ] - centroid_y = [ - 1227, - 1226, - 1059, - 1410, - 1103, - 1265, - 1288, - 1050, - 1055, - 1395, - 1096, - 1257, - 1278, - 1040, - ] - centroid_x = [ - 3197, - 3185, - 3192, - 3169, - 2948, - 2904, - 3483, - 3433, - 3195, - 3170, - 2945, - 2904, - 3491, - 3432, - ] - density_gt = [ - 0.01584935, - 0.01582909, - 0.01949619, - 0.01791645, - 0.01520982, - 0.01631341, - 0.01520432, - 0.01723284, - 0.01947099, - 0.01637999, - 0.01529952, - 0.01653538, - 0.01463112, - 0.01745064, - ] - - # Create dataframe with these features - df = pd.DataFrame() - df["index_sequence"] = index_seqs - df["label_img"] = label_img - df["CellId"] = ids - df["volume"] = vols - df["centroid_y"] = centroid_y - df["centroid_x"] = centroid_x - df["density_gt"] = density_gt - - # run colony metrics calculation - df_colony_metrics = add_colony_metrics(df) - # The new density value should be approximately the same as the old density value divided by 4, - # squared. This is because the old density was computed as 1 / mean(neighbor distances), where - # the neighbor distances were downsampled by a factor of 4. The new density does not downsample - # and uses 1 / mean(neighbor distances)^2 so that the units are closer to what is expected - # from a "density" metric - df_colony_metrics["old_density"] = df_colony_metrics["density"].apply(lambda x: 4 * np.sqrt(x)) - assert np.allclose(df_colony_metrics["old_density"], df_colony_metrics.density_gt, rtol=0.2) - -def test_voronoi_synthetic_distance_density(): +def test_voronoi_synthetic_distance(): """ This test uses a set of cells laid out in the following pattern. a b c d @@ -172,9 +58,6 @@ def test_voronoi_synthetic_distance_density(): expected_neighbor_distances[12] = np.mean([10, 10, 10.0 * np.sqrt(2)]) # Cell m assert np.allclose(df_colony_metrics.neigh_distance, expected_neighbor_distances) - expected_densities = 1 / expected_neighbor_distances**2 - assert np.allclose(df_colony_metrics.density, expected_densities) - def test_voronoi_neighbors(): """ From 1e0dcc8259276afa2c05a886f2c43ea18b3c238d Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Fri, 6 Dec 2024 10:36:33 -0800 Subject: [PATCH 20/34] make fast load_local run all --- .../lib/preprocessing/global_dataset_filtering.py | 6 ++++-- run_all_manuscript_workflows.py | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index bdbd7f79..d82b2408 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -27,7 +27,7 @@ def load_dataset_with_features( dataset="all_baseline", remove_growth_outliers=True, - load_local=False, + load_local=True, save_local=False, num_workers=32, ): @@ -393,7 +393,7 @@ def merge_datasets(df_all, df_full): 'resolution_level', '2d_area_cyto', 'inv_cyto_density', - 'density' + 'density', # created in add_groth_features.fit_tracks_to_model() 'tscale_exponentialfit_volume', @@ -438,6 +438,8 @@ def remove_columns(df, column_list=COLUMNS_TO_DROP): df : pandas.DataFrame The dataframe with the columns removed. """ + column_list = [col for col in column_list if col in df.columns] + df = df.drop(columns=column_list) return df diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py index dd0d0162..6cf5fe5d 100644 --- a/run_all_manuscript_workflows.py +++ b/run_all_manuscript_workflows.py @@ -3,6 +3,15 @@ from nuc_morph_analysis.analyses.colony_area import colony_area_workflow from nuc_morph_analysis.analyses.segmentation_model_validation import seg_model_validation_figure_workflow +from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features + +# generate manifests and save local. +load_dataset_with_features("all_baseline", load_local=False, save_local=True) +load_dataset_with_features("all_baseline", load_local=False, save_local=True, remove_growth_outliers=False) +load_dataset_with_features("all_drug_perturbation", load_local=False, save_local=True) +load_dataset_with_features("all_feeding_control", load_local=False, save_local=True) +# all subsequent loading will be done from these local files + class Workflows: def figure_1_dataset(): From d64ce321fb68e5b65005f93f744a02e6c0dce014 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 12:19:59 -0800 Subject: [PATCH 21/34] update TFE to reflect new features --- .../preprocessing/save_datasets_for_quilt.py | 126 +----------------- .../lib/visualization/glossary.py | 9 +- .../lib/visualization/label_tables.py | 31 +++-- .../visualization/write_data_for_colorizer.py | 95 +------------ 4 files changed, 37 insertions(+), 224 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 8fa12006..3dd27d00 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -179,7 +179,6 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): "neighbor_avg_lrm_xy_aspect_90um", "neighbor_avg_lrm_mesh_sa_90um", "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", - # "neighbor_avg_lrm_dxdt_48_volume_90um", #used in lrm "sisters_volume_at_B", @@ -207,14 +206,12 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): '2d_area_nucleus', '2d_area_pseudo_cell', '2d_area_nuc_cell_ratio', - '2d_perimeter_nucleus', #can be dropped at end of global dataset filterig, only used in load_dataset_with_features. - '2d_perimeter_pseudo_cell', # can be dropped at end of global dataset filtering, only used in load_dataset_with_features - '2d_perimeter_nuc_cell_ratio', # used within filter data + '2d_perimeter_nucleus', + '2d_perimeter_pseudo_cell', + '2d_perimeter_nuc_cell_ratio', 'bad_pseudo_cells_segmentation', 'uncaught_pseudo_cell_artifact', - - # neighbor_of_X features 'frame_of_breakdown', # used in figure_mitotic_filtering_examples.py 'frame_of_formation',# used in figure_mitotic_filtering_examples.py @@ -225,14 +222,12 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'has_mitotic_neighbor_dilated', # used in figure_mitotic_filtering_examples.py 'identified_death', # used in neighbor_of_X/example 'frame_of_death', # used in neighbor_of_X/example - - 'has_dying_neighbor_forward_dilated', #used, must keep # volume_dips features 'volume_change_over_25_minutes', #used 'power_fit_volume', #used and could go to tfe - 'volume_dips_peak_mask_at_region', #u sed for S10 C right and nice for TFE + 'volume_dips_peak_mask_at_region', #used for S10 C right and nice for TFE 'volume_dips_peak_mask_at_center', # used for S10C right 'volume_dips_volume_change_at_center', # used for S10D thresholding 'volume_dips_removed_um_unfilled', # used for S10E abd S10B @@ -240,127 +235,16 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um', # used for S10 G 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony', # used for S10 F - # new fitting paramaters features 'RMSE_exponentialfit_volume', #used 'RMSE_linearfit_volume', #used - - ] - -drop_list = [ - # not sure where these come from - # dropping in global_dataset_filtering (DONE) - 'level_0', - 'index', - 'source_manifest_x', - 'source_manifest_y', - - # this set is defined in psuedo_cell_helper, not used after. Remove in gloabl_dataset_filtering (DONE) - # could be removed in pseudo_cell_helper.choose_columns() but that would require rerunning generate_main_manifest - '2d_label_true_nucleus', - '2d_area_true_nucleus', - '2d_total_area_true_nucleus', - '2d_label_nucleus', - '2d_bbox-0_nucleus', - '2d_bbox-1_nucleus', - '2d_bbox-2_nucleus', - '2d_bbox-3_nucleus', - '2d_centroid-0_nucleus', - '2d_centroid-1_nucleus', - '2d_convex_area_nucleus', - '2d_eccentricity_nucleus', - '2d_equivalent_diameter_nucleus', - '2d_extent_nucleus', - '2d_filled_area_nucleus', - '2d_major_axis_length_nucleus', - '2d_minor_axis_length_nucleus', - '2d_orientation_nucleus', - '2d_solidity_nucleus', - '2d_img_shape_nucleus', - 'resolution_level_dup1', - '2d_label_true_pseudo_cell', - '2d_area_true_pseudo_cell', - '2d_total_area_true_pseudo_cell', - '2d_label_pseudo_cell', - '2d_bbox-0_pseudo_cell', - '2d_bbox-1_pseudo_cell', - '2d_bbox-2_pseudo_cell', - '2d_bbox-3_pseudo_cell', - '2d_centroid-0_pseudo_cell', - '2d_centroid-1_pseudo_cell', - '2d_convex_area_pseudo_cell', - '2d_eccentricity_pseudo_cell', - '2d_equivalent_diameter_pseudo_cell', - '2d_extent_pseudo_cell', - '2d_filled_area_pseudo_cell', - '2d_major_axis_length_pseudo_cell', - '2d_minor_axis_length_pseudo_cell', - '2d_orientation_pseudo_cell', - '2d_solidity_pseudo_cell', - '2d_img_shape_pseudo_cell', - 'resolution_level_dup2', - '2d_label_true_edge', - '2d_area_true_edge', - '2d_total_area_true_edge', - '2d_label_edge', - '2d_intensity_max_edge', - '2d_intensity_mean_edge', - '2d_intensity_min_edge', # this one is fun, its distance to nearest nucleus edge (different than centroid distance) - '2d_img_shape_edge', - 'resolution_level', - '2d_area_cyto', - 'inv_cyto_density', - - # can be dropped using code, duplicate column (DONE) - 'dxdt_5_volume_end', - - # drop in global_dataset_filtering (DONE) - 'tscale_exponentialfit_volume', - 'atB_exponentialfit_volume', - 'rate_exponentialfit_volume', - 'tscale_linearfit_volume', - 'atB_linearfit_volume', - 'rate_linearfit_volume', - - # created in volume/filter_out_dips.find_and_remove_from_pivot() and not used after. - # Remove in global_dataset_filtering (DONE) - 'volume_dips_has_peak', - 'volume_dips_volume_change_at_region', - 'volume_dips_width_at_center', - 'volume_dips_width_at_region', - 'volume_dips_max_volume_change', - 'volume_dips_peak_id_at_center', - 'volume_dips_peak_id_at_region', - 'volume_dips_total_number', - - # created in add_features.sum_mitotic_events_along_full_track() - # can remove in code (DONE) - 'sum_number_of_frame_of_breakdown_neighbors', - 'sum_number_of_frame_of_formation_neighbors', - 'sum_has_mitotic_neighbor_breakdown', - 'sum_has_mitotic_neighbor_formation', - 'sum_has_mitotic_neighbor_breakdown_forward_dilated', - 'sum_has_mitotic_neighbor_formation_backward_dilated', - 'sum_has_mitotic_neighbor_dilated', - 'sum_has_dying_neighbor_forward_dilated', - 'sum_number_of_frame_of_death_neighbors', - - # created in labeling_neighbors_helper.find_neighbors_of_cells() - # remove in global_dataset_filtering (DONE) - 'number_of_frame_of_breakdown_neighbors', # CAN BE DROPPED, not used - 'number_of_frame_of_death_neighbors', # CAN BE DROPPED, not used - 'number_of_frame_of_formation_neighbors', # CAN BE DROPPED, not used ] -unnacounted_cols = [col for col in new_cols if col not in keep_list and col not in drop_list] +unnacounted_cols = [col for col in new_cols if col not in keep_list] still_needs_dropping = [col for col in new_cols if col not in keep_list] print(len(still_needs_dropping), still_needs_dropping) print(len(keep_list)) print(len(unnacounted_cols)) -overlap = [x for x in keep_list if x in drop_list] -overlap2 = [x for x in drop_list if x in keep_list] - -print(len(overlap),len(overlap2)) #%% for col in unnacounted_cols: print(col) diff --git a/nuc_morph_analysis/lib/visualization/glossary.py b/nuc_morph_analysis/lib/visualization/glossary.py index 601d853d..990ca89a 100644 --- a/nuc_morph_analysis/lib/visualization/glossary.py +++ b/nuc_morph_analysis/lib/visualization/glossary.py @@ -11,7 +11,7 @@ "late_growth_rate_by_endpoints": "The growth rate of the growth phase calculated by endpoints (i.e., “volume at the end of growth” - “volume at start of growth”) / “growth duration”.", "tscale_linearityfit_volume": "Each volume trajectory was fit to a power law scaling with time 𝑉(𝑡) =𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼 over all time points during the growth phase. This feature is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory.", "dxdt_48_volume": "The change in volume over time for a four hour rolling window for middle interphase time points of full-interphase trajectories.", - "density": "Inverse of the squared average distance to centroids of all neighboring nuclei. Neighbors were determined using a Voronoi tessellation graph.", + "density": "The ratio between the area of the nucleus (from maximum z projected nuclear segmentation) and the area of the pseudo cell (from maximum z projected nuclear segmentation) gives a metric represtative of the local density for each nucleus at every timepoint. The density was not calculated if the nucleus was on the edge of the colony or had a mitotic neighbor.", "normalized_time": "The time within interphase normalized by the total interphase time for full-interphase nuclear trajectories. This ranges from 0 to 1, where 0 represents the start of interphase and 1 represents the end of interphase.", "sync_time_Ff": "Time synchronized to start of interphase for each single full-interphase nuclear trajectory (i.e., all trajectories start with a synchronized time of 0 hours).", "time_at_B": "The calculated time of the start of the growth phase in single full-interphase nuclear trajectory. This time is relative to the start of the timelapse imaging.", @@ -41,6 +41,13 @@ "SA_fold_change_fromB": "The surface area fold-change relative to the surface area at the start of growth for a full-interphase nuclear trajectory (i.e. surface area / surface area at start of growth).", "delta_SA_BC": "The amount of surface area added from the start to the end of growth for a full-interphase nuclear trajectory (i.e., “surface area at the end of growth” - “surface area at start of growth”).", "SA_vol_ratio": "The ratio of the surface area to the volume of the nuclear segmentation at every time point.", + "2d_area_nucleus": "The area of the nucleus (from maximum z projected nuclear segmentation).", + "2d_area_pseudo_cell": "The area of the pseudo cell (from maximum z projected nuclear segmentation).", + "volume_change_over_25_minutes": "The nuclear volume change in a 25 minute window at each frame (t) calculated by ∆V(t) = V(t-5) - V(t).", + "power_fit_volume": "Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This feature is the power fit volume.", + "volume_dips_peak_mask_at_region": "True at all timepoints along an identified volume dip event.", + "has_mitotic_neighbor_dilated": "True at timepoints when adjacent neighbors are undergoing mitosis.", + "has_dying_neighbor_forward_dilated": "True at timepoints when adjacent neighbors are undergoing cell death.", } # Colored segmentation: The calculated feature is available for that nucleus. # Grey segmentation: The calculated feature is not available for that nucleus. This could be because the nuclear segmentation is an outlier at that time point (i.e. touching the edge of the field of view, identified as an erroneous segmentation or tracking) or the feature could not be calculated (i.e. features that require the full-interphase trajectory). diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index a7d167b6..4579b326 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -396,18 +396,15 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "baseline_colonies_dataset": "Baseline colonies dataset filter", "full_interphase_dataset": "Full-interphase dataset filter", "lineage_annotated_dataset": "Lineage-annotated dataset filter", - - # mitotic and apoptotic neighbor columns - "number_of_frame_of_breakdown_neighbors": "# of neighboring cells undergoing breakdown", - "number_of_frame_of_formation_neighbors": "# of neighboring cells undergoing formation", - "number_of_frame_of_death_neighbors": "# of neighboring cells undergoing death", - "2d_area_nuc_cell_ratio": "Nuclear area to (pseudo)cell area ratio", - "2d_area_nucleus": "Nuclear area", - "2d_area_pseudo_cell": "(Pseudo)cell area", - - # dip event features - "volume_change_over_25_minutes": "Change in volume in 25 minute window", -} + "2d_area_nuc_cell_ratio": "Density", + "2d_area_nucleus": "2D nuclear area", + "2d_area_pseudo_cell": "Pseudo cell area", + "volume_change_over_25_minutes": "Change in volume in 25 minute window", + "power_fit_volume": "Power law fitted volume", + "volume_dips_peak_mask_at_region": "Volume dip flag", + "has_mitotic_neighbor_dilated": "Has mitotic neighbor flag", + "has_dying_neighbor_forward_dilated": "Has dying neighbor flag", +} # units for quantities UNIT_TABLE = { @@ -445,7 +442,8 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "difference_volume_at_B", "difference_half_vol_at_C_and_B" "avg_sister_volume_at_B", "volume_sub", - "volume_change_over_25_minutes", + "volume_change_over_25_minutes", + "power_fit_volume", ): "(μm\u00B3)", "SA_vol_ratio": "(μm⁻¹)", ( @@ -478,7 +476,12 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "late_growth_rate_by_endpoints", ): "(μm\u00B3/hr)", "exp_growth_coeff_BC": "(hr⁻¹)", - "2d_area_nuc_cell_ratio": "", # no unit, since ratio + #unitless + ( + "2d_area_nuc_cell_ratio", + "has_mitotic_neighbor_dilated", + "has_dying_neighbor_forward_dilated", + ): "", } # now add the dxdt columns diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index dc405978..2ac2db99 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -61,18 +61,6 @@ class NucMorphFeatureSpec: # You can find the most updated version on GitHub here: # https://github.com/allen-cell-animated/colorizer-data/blob/main/documentation/DATA_FORMAT.md -# NUCMORPH DATA REFERENCE: -# colony string In FMS manifest Name of which dataset this row of data belongs to (small, medium, or large) -# track_id int In FMS manifest ID for a single nucleus in all frames for which it exists (single value per nucleus, consistent across multiple frames) -# CellID hash In FMS manifest ID for a single instance/frame of a nucleus (every nucleus has a different value in every frame) -# index_sequence int In FMS manifest frame number associated with the nucleus data in a given row, relative to the start of the movie -# colony_time int Needs calculated and added Frame number staggered by a given amount per dataset, so that the frame numbers in all datasets are temporally algined relative to one another rather than all starting at 0 -# is_outlier boolean In FMS manifest True if this nucleus in this frame is flagged as an outlier (a single nucleus may be an outlier in some frames but not others) -# edge_cell boolean In FMS manifest True if this nucleus touches the edge of the FOV -# volume float In FMS manifest Volume of a single nucleus in pixels in a given frame -# height float In FMS manifest Height (in the z-direction) of the a single nucleus in pixels in a given frame -# NUC_PC1 float Needs calculated and added Value for shape mode 1 for a single nucleus in a given frame - OBJECT_ID_COLUMN = "label_img" """Column of object IDs (or unique row number).""" @@ -97,7 +85,7 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("height"), NucMorphFeatureSpec("xy_aspect"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), + NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("normalized_colony_depth"), NucMorphFeatureSpec( "termination", @@ -122,7 +110,7 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), + NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), @@ -159,7 +147,7 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), + NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), @@ -196,7 +184,7 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("late_growth_rate_by_endpoints"), NucMorphFeatureSpec("tscale_linearityfit_volume"), NucMorphFeatureSpec("dxdt_48_volume"), - NucMorphFeatureSpec("density"), + NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("normalized_time"), NucMorphFeatureSpec("sync_time_Ff"), NucMorphFeatureSpec("time_at_B"), @@ -238,90 +226,21 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("SA_fold_change_fromB"), NucMorphFeatureSpec("delta_SA_BC"), NucMorphFeatureSpec("SA_vol_ratio"), - # mitotic and apoptotic neighbor columns - NucMorphFeatureSpec( - "frame_of_breakdown", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec( - "frame_of_formation", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec("number_of_frame_of_breakdown_neighbors"), - NucMorphFeatureSpec("number_of_frame_of_formation_neighbors"), - NucMorphFeatureSpec( - "has_mitotic_neighbor_breakdown", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec( - "has_mitotic_neighbor_formation", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec( - "has_mitotic_neighbor_breakdown_forward_dilated", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec( - "has_mitotic_neighbor_formation_backward_dilated", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), - NucMorphFeatureSpec( - "has_mitotic_neighbor", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), NucMorphFeatureSpec( "has_mitotic_neighbor_dilated", type=FeatureType.CATEGORICAL, categories=["False", "True"], ), - NucMorphFeatureSpec( - "frame_of_death", type=FeatureType.CATEGORICAL, categories=["False", "True"] - ), - NucMorphFeatureSpec( - "has_dying_neighbor", - type=FeatureType.CATEGORICAL, - categories=["False", "True"], - ), NucMorphFeatureSpec( "has_dying_neighbor_forward_dilated", type=FeatureType.CATEGORICAL, categories=["False", "True"], ), - NucMorphFeatureSpec("number_of_frame_of_death_neighbors"), - NucMorphFeatureSpec("sum_has_mitotic_neighbor_breakdown"), # per track feature - NucMorphFeatureSpec("sum_has_mitotic_neighbor_formation"), # per track feature - NucMorphFeatureSpec("sum_has_mitotic_neighbor"), # per track feature - NucMorphFeatureSpec("sum_has_dying_neighbor"), # per track feature - NucMorphFeatureSpec( - "sum_number_of_frame_of_breakdown_neighbors" - ), # per track feature - NucMorphFeatureSpec("number_of_frame_of_death_neighbors"), # per track feature - # new columns - # useful and likely to make it to final dataset - NucMorphFeatureSpec("2d_area_nuc_cell_ratio"), NucMorphFeatureSpec("2d_area_nucleus"), NucMorphFeatureSpec("2d_area_pseudo_cell"), - - # potentially useful, but likely not needed in final dataset - NucMorphFeatureSpec("2d_perimeter_nucleus"), - NucMorphFeatureSpec("2d_perimeter_pseudo_cell"), - NucMorphFeatureSpec('2d_perimeter_nuc_cell_ratio'), - - # volume dip columns - NucMorphFeatureSpec('colony_depth', type=FeatureType.DISCRETE), - NucMorphFeatureSpec('volume_dips_removed_um_unfilled'), - NucMorphFeatureSpec('dxdt_48_volume_dips_removed_um_unfilled'), - NucMorphFeatureSpec('volume_dips_has_peak'), - NucMorphFeatureSpec('volume_dips_max_volume_change'), - NucMorphFeatureSpec('volume_dips_volume_change_at_region'), - NucMorphFeatureSpec('volume_change_over_25_minutes') + NucMorphFeatureSpec('volume_dips_peak_mask_at_region'), + NucMorphFeatureSpec('volume_change_over_25_minutes'), + NucMorphFeatureSpec('power_fit_volume'), ], } From b81fdc118ca8fe15d474334070692603975e1936 Mon Sep 17 00:00:00 2001 From: Chris Frick Date: Fri, 6 Dec 2024 13:09:36 -0800 Subject: [PATCH 22/34] restore load_local=False --- .../lib/preprocessing/global_dataset_filtering.py | 2 +- run_all_manuscript_workflows.py | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index d82b2408..da14de1c 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -27,7 +27,7 @@ def load_dataset_with_features( dataset="all_baseline", remove_growth_outliers=True, - load_local=True, + load_local=False, save_local=False, num_workers=32, ): diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py index 6cf5fe5d..8c84a96e 100644 --- a/run_all_manuscript_workflows.py +++ b/run_all_manuscript_workflows.py @@ -3,16 +3,6 @@ from nuc_morph_analysis.analyses.colony_area import colony_area_workflow from nuc_morph_analysis.analyses.segmentation_model_validation import seg_model_validation_figure_workflow -from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features - -# generate manifests and save local. -load_dataset_with_features("all_baseline", load_local=False, save_local=True) -load_dataset_with_features("all_baseline", load_local=False, save_local=True, remove_growth_outliers=False) -load_dataset_with_features("all_drug_perturbation", load_local=False, save_local=True) -load_dataset_with_features("all_feeding_control", load_local=False, save_local=True) -# all subsequent loading will be done from these local files - - class Workflows: def figure_1_dataset(): import nuc_morph_analysis.analyses.dataset_images_for_figures.figure_1_workflow From 3d0b3f8ce88413a52fd37701c15da6c91befc0c9 Mon Sep 17 00:00:00 2001 From: Julie Dixon Date: Fri, 6 Dec 2024 14:38:28 -0800 Subject: [PATCH 23/34] use glossary file from tfe update commit d64ce32 --- .../lib/visualization/glossary.py | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/glossary.py b/nuc_morph_analysis/lib/visualization/glossary.py index baa739a4..b5f40b43 100644 --- a/nuc_morph_analysis/lib/visualization/glossary.py +++ b/nuc_morph_analysis/lib/visualization/glossary.py @@ -11,6 +11,43 @@ "late_growth_rate_by_endpoints": "The growth rate of the growth phase calculated by endpoints (i.e., “volume at the end of growth” - “volume at start of growth”) / “growth duration”.", "tscale_linearityfit_volume": "Each volume trajectory was fit to a power law scaling with time 𝑉(𝑡) =𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼 over all time points during the growth phase. This feature is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory.", "dxdt_48_volume": "The change in volume over time for a four hour rolling window for middle interphase time points of full-interphase trajectories.", + "density": "The ratio between the area of the nucleus (from maximum z projected nuclear segmentation) and the area of the pseudo cell (from maximum z projected nuclear segmentation) gives a metric represtative of the local density for each nucleus at every timepoint. The density was not calculated if the nucleus was on the edge of the colony or had a mitotic neighbor.", + "normalized_time": "The time within interphase normalized by the total interphase time for full-interphase nuclear trajectories. This ranges from 0 to 1, where 0 represents the start of interphase and 1 represents the end of interphase.", + "sync_time_Ff": "Time synchronized to start of interphase for each single full-interphase nuclear trajectory (i.e., all trajectories start with a synchronized time of 0 hours).", + "time_at_B": "The calculated time of the start of the growth phase in single full-interphase nuclear trajectory. This time is relative to the start of the timelapse imaging.", + "colony_time_at_B": "In general, “aligned colony time” is the universal timeline for all three baseline colonies (Small, Medium and Large), based on aligning their individual timelapses based on their colony development. This feature gives the time for the start of the growth phase for an individual full-interphase nuclear trajectory within aligned colony time.", + "normalized_colony_depth": "The normalized radial position in the colony, where the center of the colony is 0 and the edge is 1. The colony depth of each nucleus is assigned using a Voronoi tessellation graph. The normalized distance from the center is calculated from these depths as (maximum depth within the colony - individual nuclear depth) / (maximum depth within the colony - minimum depth within the colony)", + "termination": "Manual annotation of how a trajectory terminated. 0 - trajectory terminates by cell dividing. 1 - trajectory terminates by nucleus going off the edge of the field of view. 2 - trajectory terminates by apoptosis.", + "is_growth_outlier": "Is true if the nuclear trajectory is identified as a biological outlier (e.g. grows for an abnormally long time and the daughters die)", + "baseline_colonies_dataset": "Filter which limits the included dataset to nuclei in the “baseline colonies analysis dataset.” This includes all nuclei tracked for at least one hour and included “growth outliers” (biological outliers) but excludes any technical outliers (outliers automatically filtered or annotated as having errors in segmentation, tracking, etc).", + "full_interphase_dataset": "Filter which limits the included dataset to nuclei analyzed in the “full-interphase analysis dataset.” It is a subset of the “baseline colonies analysis dataset.” Only nuclei tracked successfully throughout interphase are included in this dataset, and growth outliers are excluded from this dataset.", + "lineage_annotated_dataset": "Filter which limits the included dataset to nuclei analyzed in the “lineage-annotated analysis dataset.” It is a subset of the “full-interphase dataset,” including just the nuclei in the Small and Medium colonies from this dataset. It includes the “Family ID” feature.", + "volume_at_A": "The volume at the time of lamin shell formation, the start of the expansion phase, for a full-interphase nuclear trajectory.", + "time_at_A": "Time at lamin shell formation, the start of the expansion phase, for a full-interphase nuclear trajectory.", + "time_at_C": "Time at lamin shell breakdown, the end of the growth phase, for a full-interphase nuclear trajectory.", + "duration_AB": "Duration of the expansion phase from lamin shell formation to the start of the growth phase for a full-interphase nuclear trajectory.", + "duration_AC": "Duration of the total time during interphase from lamin shell formation (which is the start of the expansion phase) to lamin shell breakdown (which is the end of the growth phase) for a full-interphase nuclear trajectory. Interphase includes both the “expansion” phase and the “growth” phase.", + "growth_rate_AB": "The growth rate of the expansion phase calculated by endpoints: (volume at end of expansion - volume at start of expansion) / expansion duration for a full-interphase nuclear trajectory.", + "volume_fold_change_fromB": "The volume fold-change relative to the volume at the start of growth for a full-interphase nuclear trajectory (i.e. volume / volume at start of growth).", + "distance_from_centroid": "Distance from the center of the colony.", + "neighbor_avg_dxdt_48_volume_whole_colony": "The transient growth rate over a four hour rolling window centered at a given time point, averaged across all nuclei in the colony.", + "neighbor_avg_dxdt_48_volume_90um": "The transient growth rate over a four hour rolling window centered at a given time point, averaged across all nuclei within a 90 µm radius neighborhood of each nucleus.", + "zy_aspect": "The ratio of width to the height of the nuclear segmentation at every time point. The width is defined as the length of nuclear segmentation in the plane perpendicular to the longest axis in the XY-plane. The height is the length of the nuclear segmentation in the Z-plane.", + "xz_aspect": "The ratio of width to the height of the nuclear segmentation at every time point. The length is defined as the longest axis of the nuclear segmentation in the XY-plane. The height is the length of the nuclear segmentation in the Z-plane.", + "mesh_sa": "The number of pixels on the surface of the smoothed mesh of the nuclear segmentation at every time point.", + "SA_at_B": "The surface area at the start of the growth phase for a full-interphase nuclear trajectory.", + "SA_at_C": "The surface area at the time of lamin shell breakdown, the end of the growth phase, for a full-interphase nuclear trajectory.", + "SA_fold_change_BC": "The surface area fold-change from the start to the end of growth for a full-interphase nuclear trajectory (i.e., “surface area at the end of growth” / “surface area at start of growth”).", + "SA_fold_change_fromB": "The surface area fold-change relative to the surface area at the start of growth for a full-interphase nuclear trajectory (i.e. surface area / surface area at start of growth).", + "delta_SA_BC": "The amount of surface area added from the start to the end of growth for a full-interphase nuclear trajectory (i.e., “surface area at the end of growth” - “surface area at start of growth”).", + "SA_vol_ratio": "The ratio of the surface area to the volume of the nuclear segmentation at every time point.", + "2d_area_nucleus": "The area of the nucleus (from maximum z projected nuclear segmentation).", + "2d_area_pseudo_cell": "The area of the pseudo cell (from maximum z projected nuclear segmentation).", + "volume_change_over_25_minutes": "The nuclear volume change in a 25 minute window at each frame (t) calculated by ∆V(t) = V(t-5) - V(t).", + "power_fit_volume": "Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This feature is the power fit volume.", + "volume_dips_peak_mask_at_region": "True at all timepoints along an identified volume dip event.", + "has_mitotic_neighbor_dilated": "True at timepoints when adjacent neighbors are undergoing mitosis.", + "has_dying_neighbor_forward_dilated": "True at timepoints when adjacent neighbors are undergoing cell death.", } # Colored segmentation: The calculated feature is available for that nucleus. -# Grey segmentation: The calculated feature is not available for that nucleus. This could be because the nuclear segmentation is an outlier at that time point (i.e. touching the edge of the field of view, identified as an erroneous segmentation or tracking) or the feature could not be calculated (i.e. features that require the full-interphase trajectory). +# Grey segmentation: The calculated feature is not available for that nucleus. This could be because the nuclear segmentation is an outlier at that time point (i.e. touching the edge of the field of view, identified as an erroneous segmentation or tracking) or the feature could not be calculated (i.e. features that require the full-interphase trajectory). \ No newline at end of file From e5b537b8557ce76de019be1f9a148635e9a56397 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 15:02:48 -0800 Subject: [PATCH 24/34] boolean feature correction --- .../lib/visualization/write_data_for_colorizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index 2ac2db99..1037c928 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -238,7 +238,11 @@ class NucMorphFeatureSpec: ), NucMorphFeatureSpec("2d_area_nucleus"), NucMorphFeatureSpec("2d_area_pseudo_cell"), - NucMorphFeatureSpec('volume_dips_peak_mask_at_region'), + NucMorphFeatureSpec( + "volume_dips_peak_mask_at_region", + type=FeatureType.CATEGORICAL, + categories=["False", "True"], + ), NucMorphFeatureSpec('volume_change_over_25_minutes'), NucMorphFeatureSpec('power_fit_volume'), ], From e3b498e4c8e9fff156a59f823330ed230566b285 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 15:09:29 -0800 Subject: [PATCH 25/34] update glossary --- nuc_morph_analysis/lib/visualization/glossary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/glossary.py b/nuc_morph_analysis/lib/visualization/glossary.py index b5f40b43..72e434c3 100644 --- a/nuc_morph_analysis/lib/visualization/glossary.py +++ b/nuc_morph_analysis/lib/visualization/glossary.py @@ -11,7 +11,7 @@ "late_growth_rate_by_endpoints": "The growth rate of the growth phase calculated by endpoints (i.e., “volume at the end of growth” - “volume at start of growth”) / “growth duration”.", "tscale_linearityfit_volume": "Each volume trajectory was fit to a power law scaling with time 𝑉(𝑡) =𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼 over all time points during the growth phase. This feature is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory.", "dxdt_48_volume": "The change in volume over time for a four hour rolling window for middle interphase time points of full-interphase trajectories.", - "density": "The ratio between the area of the nucleus (from maximum z projected nuclear segmentation) and the area of the pseudo cell (from maximum z projected nuclear segmentation) gives a metric represtative of the local density for each nucleus at every timepoint. The density was not calculated if the nucleus was on the edge of the colony or had a mitotic neighbor.", + "2d_area_nuc_cell_ratio": "The ratio between the area of the nucleus (from maximum z projected nuclear segmentation) and the area of the pseudo cell (from maximum z projected nuclear segmentation) gives a metric represtative of the local density for each nucleus at every timepoint. The density was not calculated if the nucleus was on the edge of the colony or had a mitotic neighbor.", "normalized_time": "The time within interphase normalized by the total interphase time for full-interphase nuclear trajectories. This ranges from 0 to 1, where 0 represents the start of interphase and 1 represents the end of interphase.", "sync_time_Ff": "Time synchronized to start of interphase for each single full-interphase nuclear trajectory (i.e., all trajectories start with a synchronized time of 0 hours).", "time_at_B": "The calculated time of the start of the growth phase in single full-interphase nuclear trajectory. This time is relative to the start of the timelapse imaging.", From 7936229c95f0caccaf63ee73d1e2071cdd8e2c62 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 16:39:57 -0800 Subject: [PATCH 26/34] update saving csv for quilt --- .../preprocessing/save_datasets_for_quilt.py | 199 +----------------- 1 file changed, 1 insertion(+), 198 deletions(-) diff --git a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py index 7e4ac5aa..c816b717 100644 --- a/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py +++ b/nuc_morph_analysis/lib/preprocessing/save_datasets_for_quilt.py @@ -3,7 +3,6 @@ from datetime import datetime from pathlib import Path - # %% def check_columns(df1, df2): """ @@ -52,203 +51,7 @@ def save_dataset_for_quilt(df, dataset_name, destdir=None): # %% Load baseline colonies df_all_baseline = global_dataset_filtering.load_dataset_with_features() -# print(*[col for col in df_all_baseline.columns if "NUC_sh" not in col], sep="\n") -#%% old and new col lists -previous_cols = [ - "CellId", - "label_img", - "track_id", - "colony", - "index_sequence", - "roi", - "centroid_x", - "centroid_y", - "centroid_z", - "volume", - "height", - "mesh_vol", - "mesh_sa", - "SA_vol_ratio", - "transform_params", - "NUC_", - "length", - "width", - "xz_aspect", - "xy_aspect", - "zy_aspect", - "fov_edge", - "predicted_formation", - "predicted_breakdown", - "Ff", - "Fb", - "after_breakdown_outlier", - "before_formation_outlier", - "is_after_breakdown_before_formation_outlier", - "termination", - "entering_mitosis", - "exiting_mitosis", - "entering_or_exiting_division", - "neighbors", - "neigh_distance", - "is_tp_outlier", - "track_length", - "is_outlier_by_short_track", - "is_outlier_curated_by_id", - "is_growth_outlier", - "is_outlier_track", - "is_outlier", - "parent_id", - "family_id", - "distance_from_centroid", - "colony_depth", - "normalized_colony_depth", - "normalized_distance_from_centroid", - "colony_edge_in_fov", - "colony_time", - "non_interphase_volume", - "non_interphase_mesh_sa", - "non_interphase_SA_vol_ratio", - "non_interphase_size_shape", - "dxdt_48_volume", - "neighbor_avg_volume_90um", - "neighbor_avg_dxdt_48_volume_90um", - "neighbor_avg_volume_whole_colony", - "neighbor_avg_dxdt_48_volume_whole_colony", - "normalized_time", - "frame_transition", - "sync_time_Ff", - "volume_at_A", - "location_x_at_A", - "location_y_at_A", - "time_at_A", - "colony_time_at_A", - "volume_at_B", - "location_x_at_B", - "location_y_at_B", - "time_at_B", - "colony_time_at_B", - "volume_at_C", - "location_x_at_C", - "location_y_at_C", - "time_at_C", - "colony_time_at_C", - "duration_AB", - "duration_BC", - "duration_AC", - "delta_volume_BC", - "volume_fold_change_BC", - "SA_at_B", - "SA_at_C", - "delta_SA_BC", - "SA_fold_change_BC", - "volume_fold_change_fromB", - "SA_fold_change_fromB", - "growth_rate_AB", - "late_growth_rate_by_endpoints", - "tscale_linearityfit_volume", - "atB_linearityfit_volume", - "rate_linearityfit_volume", - "RMSE_linearityfit_volume", - "is_full_track", - "exploratory_dataset", - "baseline_colonies_dataset", - "full_interphase_dataset", - "lineage_annotated_dataset" -] -columns_list = [col for col in df_all_baseline.columns if "NUC_sh" not in col] - -#%% Get differences -previous_not_in_current = [col for col in previous_cols if col not in columns_list] -current_not_in_previous = [col for col in columns_list if col not in previous_cols] -print("Columns in previous but not in current:") -print(previous_not_in_current) -print("\nColumns in current but not in previous:") -print(current_not_in_previous) - -#%% 139 NEW COLUMNS!! -new_cols = current_not_in_previous -keep_list = [ - #needed to calc linear reg model feats - "has_mitotic_neighbor", - "has_dying_neighbor", - "sum_has_dying_neighbor", - "sum_has_mitotic_neighbor", - "neighbor_avg_lrm_volume_90um", - "neighbor_avg_lrm_height_90um", - "neighbor_avg_lrm_xy_aspect_90um", - "neighbor_avg_lrm_mesh_sa_90um", - "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", - - #used in lrm - "sisters_volume_at_B", - "sisters_duration_BC", - "sisters_delta_volume_BC", - "height_at_B", - "xy_aspect_at_B", - "SA_vol_ratio_at_B", - "neighbor_avg_lrm_volume_90um_at_B", - "neighbor_avg_lrm_height_90um_at_B", - "neighbor_avg_lrm_xy_aspect_90um_at_B", - "neighbor_avg_lrm_mesh_sa_90um_at_B", - "neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um_at_B", - "early_neighbor_avg_dxdt_48_volume_90um", - "mean_neighbor_avg_dxdt_48_volume_90um", - "mean_neighbor_avg_lrm_volume_90um", - "mean_neighbor_avg_lrm_height_90um", - "mean_neighbor_avg_lrm_xy_aspect_90um", - "mean_neighbor_avg_lrm_mesh_sa_90um", - "mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um", - "normalized_sum_has_mitotic_neighbor", - "normalized_sum_has_dying_neighbor", - - # density features - '2d_area_nucleus', - '2d_area_pseudo_cell', - '2d_area_nuc_cell_ratio', - '2d_perimeter_nucleus', - '2d_perimeter_pseudo_cell', - '2d_perimeter_nuc_cell_ratio', - 'bad_pseudo_cells_segmentation', - 'uncaught_pseudo_cell_artifact', - - # neighbor_of_X features - 'frame_of_breakdown', # used in figure_mitotic_filtering_examples.py - 'frame_of_formation',# used in figure_mitotic_filtering_examples.py - 'has_mitotic_neighbor_breakdown', # used in validation/illustration code, useful to keep - 'has_mitotic_neighbor_formation', # used in validation/illustration code, useful to keep - 'has_mitotic_neighbor_breakdown_forward_dilated', # used in neighbor_of_X/example - 'has_mitotic_neighbor_formation_backward_dilated', # used in neighbor_of_X/example - 'has_mitotic_neighbor_dilated', # used in figure_mitotic_filtering_examples.py - 'identified_death', # used in neighbor_of_X/example - 'frame_of_death', # used in neighbor_of_X/example - 'has_dying_neighbor_forward_dilated', #used, must keep - - # volume_dips features - 'volume_change_over_25_minutes', #used - 'power_fit_volume', #used and could go to tfe - 'volume_dips_peak_mask_at_region', #used for S10 C right and nice for TFE - 'volume_dips_peak_mask_at_center', # used for S10C right - 'volume_dips_volume_change_at_center', # used for S10D thresholding - 'volume_dips_removed_um_unfilled', # used for S10E abd S10B - 'dxdt_48_volume_dips_removed_um_unfilled', # used for S10E abd S10B - 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um', # used for S10 G - 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony', # used for S10 F - - # new fitting paramaters features - 'RMSE_exponentialfit_volume', #used - 'RMSE_linearfit_volume', #used - ] - -unnacounted_cols = [col for col in new_cols if col not in keep_list] -still_needs_dropping = [col for col in new_cols if col not in keep_list] -print(len(still_needs_dropping), still_needs_dropping) -print(len(keep_list)) -print(len(unnacounted_cols)) -#%% -for col in unnacounted_cols: - print(col) - - +print(*[col for col in df_all_baseline.columns if "NUC_sh" not in col], sep="\n") # %% Filter baseline colonies df_baseline = filter_data.all_timepoints_minimal_filtering(df_all_baseline) From ff3c2d1c7c74c6a997901c2b0c56e6bfb5f15a63 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 16:40:20 -0800 Subject: [PATCH 27/34] remove power law fit --- nuc_morph_analysis/lib/visualization/glossary.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/glossary.py b/nuc_morph_analysis/lib/visualization/glossary.py index 72e434c3..29d025ac 100644 --- a/nuc_morph_analysis/lib/visualization/glossary.py +++ b/nuc_morph_analysis/lib/visualization/glossary.py @@ -44,7 +44,6 @@ "2d_area_nucleus": "The area of the nucleus (from maximum z projected nuclear segmentation).", "2d_area_pseudo_cell": "The area of the pseudo cell (from maximum z projected nuclear segmentation).", "volume_change_over_25_minutes": "The nuclear volume change in a 25 minute window at each frame (t) calculated by ∆V(t) = V(t-5) - V(t).", - "power_fit_volume": "Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This feature is the power fit volume.", "volume_dips_peak_mask_at_region": "True at all timepoints along an identified volume dip event.", "has_mitotic_neighbor_dilated": "True at timepoints when adjacent neighbors are undergoing mitosis.", "has_dying_neighbor_forward_dilated": "True at timepoints when adjacent neighbors are undergoing cell death.", From d2cbd5a347fdffcb47c51ee37f3415def7ee30e4 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 16:40:37 -0800 Subject: [PATCH 28/34] remove power law fit --- .../lib/visualization/write_data_for_colorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index 1037c928..49b808a4 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -244,7 +244,6 @@ class NucMorphFeatureSpec: categories=["False", "True"], ), NucMorphFeatureSpec('volume_change_over_25_minutes'), - NucMorphFeatureSpec('power_fit_volume'), ], } @@ -419,6 +418,7 @@ def make_dataset( # load the dataset once df_all = load_dataset_with_features("all_baseline", remove_growth_outliers=False) + df_all['volume_dips_peak_mask_at_region'] = df_all['volume_dips_peak_mask_at_region'].astype(str) for filter in filters: output_dir_subset = Path(output_dir) / filter From a8fceafd104b4473f1212eebc5dba216df055587 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 16:58:58 -0800 Subject: [PATCH 29/34] update feature documentation --- docs/feature_documentation.md | 262 +++++++++++++++++++++------------- 1 file changed, 160 insertions(+), 102 deletions(-) diff --git a/docs/feature_documentation.md b/docs/feature_documentation.md index 9087681d..915fd290 100644 --- a/docs/feature_documentation.md +++ b/docs/feature_documentation.md @@ -1,102 +1,160 @@ -| Columns in manifest | Definition | Referenced as in manuscript and timelapse feature explorer | Units | Computed by | -|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| CellId | Unique identifier for one nucleus at one timepoint, corresponding to each row in the manifest. | | | [cyto-dl](https://github.com/AllenCellModeling/cyto-dl/) | -| label_img | Object ID unique for each nucleus in a given FOV. | | | cyto-dl | -| track_id | Unique identifier for one nucleus tracked over time. | Track ID | | [aics-timelapse-tracking](https://github.com/AllenCell/aics-timelapse-tracking/) and [nuc-morph-analysis](https://github.com/AllenCell/nuc-morph-analysis) | -| colony | Unique identifier for each timelapse. | Small, Medium, Large | | [nuc-morph-analysis](https://github.com/AllenCell/nuc-morph-analysis) | -| index_sequence | Frame number of the timelapse acquisition. The first frame of the movie is 0. The interval of the movie is used to convert this metric to time. | Time | frames | morflowgenesis | -| roi | The 3D bounding box region around the nucleus segmentation [bottom z, top z, bottom y, top y, bottom x, top x]. | | pixels | morflowgenesis | -| centroid_x | Horizontal x position of the nucleus segmentation's centroid relative to the field of view. | | pixels | morflowgenesis | -| centroid_y | Horizontal y position of the nucleus segmentation's centroid relative to the field of view. | | pixels | morflowgenesis | -| centroid_z | Vertical position of the nucleus segmentation's centroid relative to the field of view. The bottom of the FOV is 0. | | pixels | morflowgenesis | -| volume | Number of voxels in the nucleus segmentation. This column is used to calculate volume in the paper when scaled by the pixel size cubed. | Volume | pixels cubed | morflowgenesis | -| height | Distance from lowest pixel in the nucleus segmentation to the highest pixel. This column is used to calculate height in the paper when scaled by the pixel size squared. | Height | pixels | morflowgenesis | -| mesh_vol | Number of voxels in the smoothed mesh of the nuclear segmentation. | | pixels | morflowgenesis | -| mesh_sa | Number of pixels on the surface of the smoothed mesh of the nuclear segmentation. This column is used to calculate surface area in the paper when scaled by the pixel size squared. | Surface area | pixels squared | morflowgenesis | -| SA_vol_ratio | The ratio of mesh_sa / volume. Can be used when scaled by the pixel size^2 / pixel size^3. | Surface area to volume ratio | 1 / pixels | nuc-morph-analysis | -| transform_params | The aisshparams transform parameters (x, y and z coordinates of the nuclear mesh centroid and the xy angle) used to do 2D alignment to the longest axis prior to spherical harmonic calculation. | | | morflowgenesis | -| NUC_ | Columns that begin with NUC_ are single nucleus shape features and spherical harmonic coefficients. See [cvapipe_analysis](https://github.com/AllenCell/cvapipe_analysis/) for more details. | | | morflowgenesis | -| length | The length of the longest XY axis of the nuclear segmentation. | | pixels | morflowgenesis | -| width | The width of the nuclear segmentation perpendicular to the XY axis. | | pixels | morflowgenesis | -| xz_aspect | The ratio of length / height. | XZ aspect ratio | | nuc-morph-analysis | -| xy_aspect | The ratio of length / width. | XY aspect ratio | | nuc-morph-analysis | -| zy_aspect | The ratio of height / width. | ZY aspect ratio | | nuc-morph-analysis | -| fov_edge | Is true if nuclear segmentation touches the edge of the field of view. Nuclear segmentations that touch the FOV edge are not analyzed in the paper. | | | cyto-dl | -| predicted_formation | Index sequence of lamin shell formation for a given nuclear trajectory predicted by the interphase detector model. | | frames | cyto-dl | -| predicted_breakdown | Index sequence of lamin shell breakdown for a given nuclear trajectory predicted by the interphase detector model. | | frames | cyto-dl | -| Ff | Index sequence of lamin shell formation used in the paper. Occurs at a frame where a segmentation is present, not an outlier, and within two frames of the predicted index sequence. | | frames | nuc-morph-analysis | -| Fb | Index sequence of lamin shell breakdown used in the paper. Occurs at a frame where a segmentation is present, not an outlier, and within two frames of the predicted index sequence. | | frames | nuc-morph-analysis | -| after_breakdown_outlier | Time point occurs after Ff. | | | nuc-morph-analysis | -| before_formation_outlier | Timepoint occurs before Fb. | | | nuc-morph-analysis | -| is_after_breakdown_before_formation_outlier | Is true if the timepoint occurs before frame formation or after frame breakdown. If true, the timepoint is not analyzed in the full interphase trajectories dataset. | | | nuc-morph-analysis | -| termination | Manual annotation of 0 - track terminates by dividing. 1 - track terminates by going off the edge of the FOV. 2 - track terminates by apoptosis. | Trajectory termination annotation | NaN, 0, 1, or 2 | nuc-morph-analysis | -| entering_mitosis | Is true if timepoint occurs in 2.5 hour window after lamin shell formation. | | | nuc-morph-analysis | -| exiting_mitosis | Is true if timepoint occurs in 30 minute window before lamin shell breakdown. | | | nuc-morph-analysis | -| entering_or_exiting_division | Is true if timepoint is flagged as entering mitosis or exiting mitosis. This filter is used to remove these timepoints when calculating the transient growth rate in the paper. | | | nuc-morph-analysis | -| neighbors | List of CellIds that are the adjacent nuclei, based on only nuclei centroids. | | | nuc-morph-analysis | -| neigh_distance | Mean distance from this nucleus's centroid to the centroids of neighboring nuclei. | | pixels | nuc-morph-analysis | -| density | Inverse of the neigh_distance squared. This column is used to calculate density in the paper when scaled by 1/pixel size^2. | Density | 1/ (pixels squared) | nuc-morph-analysis | -| is_tp_outlier | Is true if the timepoint is flagged as a single timepoint volume outlier. If true, the timepoint is not analyzed in the paper. | | | nuc-morph-analysis | -| track_length | The length of the nuclear trajectory in frames. | | frames | nuc-morph-analysis | -| is_outlier_by_short_track | Is true if the track length has less than 5 timepoints. If true, the nuclear trajectory is not analyzed in the paper. | | | nuc-morph-analysis | -| is_outlier_curated_by_id | Is true if the nuclear trajectory is identified as a measurement outlier (i.e. segmentation issue, tracking issue, etc.). | | | nuc-morph-analysis | -| is_growth_outlier | Is true if the nuclear trajectory is identified as a biological outlier (i.e. grows for an abnormally long time and the daughters die). | | | nuc-morph-analysis | -| is_outlier_track | Is true for the combines flags: is_outlier_by_short_track, is_outlier_curated_by_id, and optionally is_growth_outlier. | | | nuc-morph-analysis | -| is_outlier | Is true for the combined flags: is_tp_outlier, is_after_breakdown_before_formation_outlier, is_outlier_track. | Growth Outlier Filter | | nuc-morph-analysis | -| parent_id | The manually curated track_id of the parent cell. If the cell was manually identified to have no parent, this is -1. Otherwise NaN. This column is used to find mother-daughter and sister pairs in the paper. | Parent ID | | nuc-morph-analysis | -| family_id | Unique identifier for all the nuclei in the same family tree. | Family ID | | nuc-morph-analysis | -| distance_from_centroid | Distance from the centroid of the nuclear segmentation to the colony center. | | microns | nuc-morph-analysis | -| colony_depth | Nuclei on the colony boundary have a depth of one. Nuclei are then assigned an increasing colony depth value using the Voronoi tessellation graph adjacency to all nuclei in the field of view (FOV). | Colony depth | | nuc-morph-analysis | -| normalized_colony_depth | The normalized radial position in the colony, where the center of the colony is 0 and the edge is 1 calculated by (maximum depth of the colony - the nuclear depth) / (maximum depth of the colony - minimum depth of the colony) using the Voronoi-graph-based colony_depth feature. | Normalized distance from colony center | | nuc-morph-analysis | -| normalized_distance_from_centroid | Distance of nucleus from centroid of colony / the maximum radius of the colony at each timepoint. | | | nuc-morph-analysis | -| colony_edge_in_fov | How much of the colony boundary is outside the FOV. | | "full", "partial" or "none" | nuc-morph-analysis | -| colony_time | For the baseline colony dataset, the index sequence that aligns the Small, Medium, and Large colonies in their development. | Aligned colony time | frames | nuc-morph-analysis | -| non_interphase_volume | True if the volume is outside the distribution of volumes for full-interphase nuclear trajectories. | | pixels^3 | nuc-morph-analysis | -| non_interphase_mesh_sa | True if the surface area is outside the distribution of volumes for full-interphase nuclear trajectories. | | pixes^2 | nuc-morph-analysis | -| non_interphase_SA_vol_ratio | True if surface area to volume ratio are outside the distribution of volumes for full-interphase nuclear trajectories. | | 1/pixels | nuc-morph-analysis | -| non_interphase_size_shape | True if volumes are outside the distribution of volumes for full-interphase nuclear trajectories based on the volume, surface area and surface area to volume ratio. Used to detect interphase timepoints for transient growth rate measurements of nuclei that are not full-interphase trajectories. | | | nuc-morph-analysis | -| dxdt_48_volume | The transient growth rate. The change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of full interphase trajectories. | Transient growth rate | | nuc-morph-analysis | -| neighbor_avg_volume_90um | The average volume for middle interphase nuclei in a 90 µm radius at each timepoint. | | | nuc-morph-analysis | -| neighbor_avg_dxdt_48_volume_90um | The transient growth rate in a 90 µm neighborhood. The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in a 90 µm radius. | Average transient growth rate in 90¬µm neighborhood | | nuc-morph-analysis | -| neighbor_avg_volume_whole_colony | The average volume for middle interphase nuclei in the whole colony at every timepoint. | | | nuc-morph-analysis | -| neighbor_avg_dxdt_48_volume_whole_colony | The average transient growth rate in the whole colony. The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in the whole colony. | Colony average transient growth rate | | nuc-morph-analysis | -| normalized_time | The normalized time is a measure of time for a full-interphase nuclear trajectory that ranges from 0 to 1, where 0 represents the time of nuclear lamin shell formation and 1 represents the time of nuclear lamin shell breakdown. | Normalized interphase time | | nuc-morph-analysis | -| frame_transition | The calculated time of transition between expansion and growth phases in single full-interphase nuclear trajectory. | Start of growth (time) | frames | nuc-morph-analysis | -| sync_time_Ff | Time, synchronized to start at the time of lamin shell formation for a single full-interphase nuclear trajectory. | Synchronized time | frames | nuc-morph-analysis | -| volume_at_A | Volume at the time of lamin shell formation for a full-interphase nuclear trajectory. | Volume at formation | microns cubed | nuc-morph-analysis | -| location_x_at_A | X location at the time of lamin shell formation for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| location_y_at_A | Y location at the time of lamin shell formation for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| time_at_A | Time at lamin shell formation for a full-interphase nuclear trajectory. | Formation time | hours | nuc-morph-analysis | -| colony_time_at_A | Aligned colony time at lamin shell formation for a full-interphase nuclear trajectory. | | hours | nuc-morph-analysis | -| volume_at_B | Volume at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory. | Volume at start of growth | microns cubed | nuc-morph-analysis | -| location_x_at_B | X location at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| location_y_at_B | Y location at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| time_at_B | Time at transition, the start of the growth phase for a full-interphase nuclear trajectory. | | hours | nuc-morph-analysis | -| colony_time_at_B | Aligned colony time at transition, the start of the growth phase for a full-interphase nuclear trajectory. | Aligned colony time at start of growth | hours | nuc-morph-analysis | -| volume_at_C | Volume at the time of lamin shell breakdown for a full-interphase nuclear trajectory. | Volume at end of growth | microns cubed | nuc-morph-analysis | -| location_x_at_C | X location at the time of lamin shell breakdown for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| location_y_at_C | Y location at the time of lamin shell breakdown for a full-interphase nuclear trajectory. | | pixels | nuc-morph-analysis | -| time_at_C | Time at transition, the start of lamin shell breakdown for a full-interphase nuclear trajectory. | End of growth (time) | hours | nuc-morph-analysis | -| colony_time_at_C | Aligned colony time at lamin shell breakdown for a full-interphase nuclear trajectory. | | hours | nuc-morph-analysis | -| duration_AB | Duration of the expansion phase from lamin shell formation to transition for a full-interphase nuclear trajectory. | Expansion duration | frames | nuc-morph-analysis | -| duration_BC | Duration of the growth phase from transition to lamin shell breakdown for a full-interphase nuclear trajectory. | Growth duration | frames | nuc-morph-analysis | -| duration_AC | The amount of volume added from transition to breakdown for a full-interphase nuclear trajectory. | Total interphase Duration | frames | nuc-morph-analysis | -| delta_volume_BC | The amount of volume added from transition to breakdown for a full-interphase nuclear trajectory. | Added volume during growth | microns cubed | nuc-morph-analysis | -| volume_fold_change_BC | The volume fold-change from transition to breakdown for a full-interphase nuclear trajectory. | Growth volume fold change | | nuc-morph-analysis | -| SA_at_B | Surface area at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory. | Surface area at start of growth | microns squared | nuc-morph-analysis | -| SA_at_C | Surface area at the time of lamin shell breakdown for a full-interphase nuclear trajectory. | Surface area at end of growth | microns squared | nuc-morph-analysis | -| delta_SA_BC | The amount of surface area added from transition to breakdown for a full-interphase nuclear trajectory. | Added surface area during growth | microns squared | nuc-morph-analysis | -| SA_fold_change_BC | The surface area fold change from transition to breakdown for a full-interphase nuclear trajectory. | Surface area fold change during growth | | nuc-morph-analysis | -| volume_fold_change_fromB | The volume fold-change relative to the volume at transition, the start of the growth phase for a full-interphase nuclear trajectory. | Volume fold-change relative to starting volume | | nuc-morph-analysis | -| SA_fold_change_fromB | The surface area fold-change relative to the volume at transition, the start of the growth phase for a full-interphase nuclear trajectory. | Surface area fold-change relative to starting surface area | | nuc-morph-analysis | -| growth_rate_AB | The growth rate of the expansion phase calculated by endpoints: (volume_at_B - volume_at_A) / duration_AB for a full-interphase nuclear trajectory. | Expansion rate | pixels cubed / frames | nuc-morph-analysis | -| late_growth_rate_by_endpoints | The growth rate of the growth phase calculated by endpoints: (volume_at_C - volume_at_B) / duration_BC for a full-interphase nuclear trajectory. | Growth rate | pixels cubed / frames | nuc-morph-analysis | -| tscale_linearityfit_volume | Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory. | Fitted time scaling factor | | nuc-morph-analysis | -| atB_linearityfit_volume | Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the starting volume fit parameter 𝑉𝑠𝑡𝑎𝑟𝑡 for a full-interphase nuclear trajectory. | | microns cubed | nuc-morph-analysis | -| rate_linearityfit_volume | Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the rate fit parameter r for a full-interphase nuclear trajectory. | | | nuc-morph-analysis | -| RMSE_linearityfit_volume | Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the root mean squared error of the difference between the fitted volume and the actual volume trajectory for a full-interphase nuclear trajectory. | | | nuc-morph-analysis | -| is_full_track | Is true if the timepoint is part of a full interphase trajectory for a full-interphase nuclear trajectory . | | | nuc-morph-analysis | -| exploratory_dataset | Baseline colonies column to filter datasets for visualization in timelapse feature explorer. | Exploratory dataset filter | | nuc-morph-analysis | -| baseline_colonies_dataset | Baseline colonies column to filter datasets for visualization in timelapse feature explorer. | Baseline colonies dataset filter | | nuc-morph-analysis | -| full_interphase_dataset | Baseline colonies column to filter datasets for visualization in timelapse feature explorer. | Full interphase analysis dataset filter | | nuc-morph-analysis | -| lineage_annotated_dataset | Baseline colonies column to filter datasets for visualization in timelapse feature explorer. | Lineage annotated analysis dataset filter | | nuc-morph-analysis | + +|Columns in manifest|Definition|Referenced as in manuscript and timelapse feature explorer|Units|Computed by| +|---|---|---|---|---| +|CellId|Unique identifier for one nucleus at one timepoint, corresponding to each row in the manifest.||unitless|[cyto-dl](https://github.com/AllenCellModeling/cyto-dl/)| +|label_img|Object ID unique for each nucleus in a given FOV.||unitless|cyto-dl| +|track_id|Unique identifier for one nucleus tracked over time.|Track ID|unitless|[aics-timelapse-tracking](https://github.com/AllenCell/aics-timelapse-tracking/) and [nuc-morph-analysis](https://github.com/AllenCell/nuc-morph-analysis)| +|colony|Unique identifier for each timelapse.|Small, Medium, Large|unitless|[nuc-morph-analysis](https://github.com/AllenCell/nuc-morph-analysis)| +|index_sequence|Frame number of the timelapse acquisition. The first frame of the movie is 0. The interval of the movie is used to convert this metric to time.|Time|frames|morflowgenesis| +|roi|The 3D bounding box region around the nucleus segmentation [bottom z, top z, bottom y, top y, bottom x, top x].||pixels|morflowgenesis| +|centroid_x|Horizontal x position of the nucleus segmentation's centroid relative to the field of view.||pixels|morflowgenesis| +|centroid_y|Horizontal y position of the nucleus segmentation's centroid relative to the field of view.||pixels|morflowgenesis| +|centroid_z|Vertical position of the nucleus segmentation's centroid relative to the field of view. The bottom of the FOV is 0.||pixels|morflowgenesis| +|volume|Number of voxels in the nucleus segmentation. This column is used to calculate volume in the paper when scaled by the pixel size cubed.|Volume|pixels cubed|morflowgenesis| +|height|Distance between the pixels with the first and 99th percentile Z-position values within the nucleus segmentation.  This column is used to calculate height in the paper when scaled by the pixel size squared.|Height|pixels|morflowgenesis| +|mesh_vol|Number of voxels in the smoothed mesh of the nuclear segmentation.||pixels cubed|morflowgenesis| +|mesh_sa|Number of pixels on the surface of the smoothed mesh of the nuclear segmentation. This column is used to calculate surface area in the paper when scaled by the pixel size squared.|Surface area|pixels squared|morflowgenesis| +|SA_vol_ratio|The ratio of mesh_sa / volume. Can be used when scaled by the pixel size^2 / pixel size^3.|Surface area to volume ratio|1 / pixels|nuc-morph-analysis| +|transform_params|The aisshparams transform parameters (x, y and z coordinates of the nuclear mesh centroid and the xy angle) used to do 2D alignment to the longest axis prior to spherical harmonic calculation.||feature dependent|morflowgenesis| +|NUC_|Columns that begin with NUC_ are single nucleus shape features and spherical harmonic coefficients. See [cvapipe_analysis](https://github.com/AllenCell/cvapipe_analysis/) for more details.||feature dependent|morflowgenesis| +|length|The length of the longest XY axis of the nuclear segmentation.||pixels|morflowgenesis| +|width|The width of the nuclear segmentation perpendicular to the XY axis.||pixels|morflowgenesis| +|xz_aspect|The ratio of length / height.|XZ aspect ratio|unitless|nuc-morph-analysis| +|xy_aspect|The ratio of length / width.|XY aspect ratio|unitless|nuc-morph-analysis| +|zy_aspect|The ratio of height / width.|ZY aspect ratio|unitless|nuc-morph-analysis| +|fov_edge|Is true if nuclear segmentation touches the edge of the field of view. Nuclear segmentations that touch the FOV edge are not analyzed in the paper.||boolean|cyto-dl| +|predicted_formation|Index sequence of lamin shell formation for a given nuclear trajectory predicted by the interphase detector model.||frames|cyto-dl| +|predicted_breakdown|Index sequence of lamin shell breakdown for a given nuclear trajectory predicted by the interphase detector model.||frames|cyto-dl| +|Ff|Index sequence of lamin shell formation used in the paper. Occurs at a frame where a segmentation is present, not an outlier, and within two frames of the predicted index sequence.||frames|nuc-morph-analysis| +|Fb|Index sequence of lamin shell breakdown used in the paper. Occurs at a frame where a segmentation is present, not an outlier, and within two frames of the predicted index sequence.||frames|nuc-morph-analysis| +|after_breakdown_outlier|Timepoint occurs after Ff.||boolean|nuc-morph-analysis| +|before_formation_outlier|Timepoint occurs before Fb.||boolean|nuc-morph-analysis| +|is_after_breakdown_before_formation_outlier|Is true if the timepoint occurs before frame formation or after frame breakdown. If true, the timepoint is not analyzed in the baseline colonies dataset.||boolean|nuc-morph-analysis| +|termination|Manual annotation of 0 - track terminates by dividing. 1 - track terminates by going off the edge of the FOV. 2 - track terminates by apoptosis.|Trajectory termination annotation|NaN, 0, 1, or 2|nuc-morph-analysis| +|entering_mitosis|Is true if timepoint occurs in 2.5 hour window after lamin shell formation.||boolean|nuc-morph-analysis| +|exiting_mitosis|Is true if timepoint occurs in 30 minute window before lamin shell breakdown.||boolean|nuc-morph-analysis| +|entering_or_exiting_division|Is true if timepoint is flagged as entering mitosis or exiting mitosis. This filter is used to remove these timepoints when calculating the transient growth rate in the paper.||boolean|nuc-morph-analysis| +|neighbors|List of CellIds that are the adjacent nuclei, based on only nuclei centroids.||unitless|nuc-morph-analysis| +|neigh_distance|Mean distance from this nucleus's centroid to the centroids of neighboring nuclei.|Mean distance to neighbors|pixels|nuc-morph-analysis| +|is_tp_outlier|Is true if the timepoint is flagged as a single timepoint volume outlier. If true, the timepoint is not analyzed in the paper.||boolean|nuc-morph-analysis| +|track_length|The length of the nuclear trajectory in frames.||frames|nuc-morph-analysis| +|is_outlier_by_short_track|Is true if the track length has less than 5 timepoints. If true, the nuclear trajectory is not analyzed in the paper.||boolean|nuc-morph-analysis| +|is_outlier_curated_by_id|Is true if the nuclear trajectory is identified as a measurement outlier (i.e. segmentation issue, tracking issue, etc.).||boolean|nuc-morph-analysis| +|is_growth_outlier|Is true if the nuclear trajectory is identified as a biological outlier (i.e. grows for an abnormally long time and the daughters die).||boolean|nuc-morph-analysis| +|is_outlier_track|Is true for the combines flags: is_outlier_by_short_track, is_outlier_curated_by_id, and optionally is_growth_outlier.||boolean|nuc-morph-analysis| +|is_outlier|Is true for the combined flags: is_tp_outlier, is_after_breakdown_before_formation_outlier, is_outlier_track.|Growth Outlier Filter|boolean|nuc-morph-analysis| +|parent_id|The manually curated track_id of the parent cell. If the cell was manually identified to have no parent, this is -1. Otherwise NaN. This column is used to find mother-daughter and sister pairs in the paper.|Parent ID|unitless|nuc-morph-analysis| +|family_id|Unique identifier for all the nuclei in the same family tree.|Family ID|unitless|nuc-morph-analysis| +|distance_from_centroid|Distance from the centroid of the nuclear segmentation to the colony center.||microns|nuc-morph-analysis| +|colony_depth|Nuclei on the colony boundary have a depth of one. Nuclei are then assigned an increasing colony depth value using the Voronoi tessellation graph adjacency to all nuclei in the field of view (FOV).|Colony depth|unitless|nuc-morph-analysis| +|normalized_colony_depth|The normalized radial position in the colony, where the center of the colony is 0 and the edge is 1 calculated by (maximum depth of the colony - the nuclear depth) / (maximum depth of the colony - minimum depth of the colony) using the Voronoi-graph-based colony_depth feature.|Normalized distance from colony center|unitless|nuc-morph-analysis| +|normalized_distance_from_centroid|Distance of nucleus from centroid of colony / the maximum radius of the colony at each timepoint.||unitless|nuc-morph-analysis| +|colony_edge_in_fov|How much of the colony boundary is outside the FOV.||"full", "partial" or "none"|nuc-morph-analysis| +|colony_time|For the baseline colony dataset, the index sequence that aligns the Small, Medium, and Large colonies in their development.|Aligned colony time|frames|nuc-morph-analysis| +|non_interphase_volume|True if the volume is outside the distribution of volumes for full-interphase nuclear trajectories.||boolean|nuc-morph-analysis| +|non_interphase_mesh_sa|True if the surface area is outside the distribution of volumes for full-interphase nuclear trajectories.||boolean|nuc-morph-analysis| +|non_interphase_SA_vol_ratio|True if surface area to volume ratio are outside the distribution of volumes for full-interphase nuclear trajectories.||boolean|nuc-morph-analysis| +|non_interphase_size_shape|True if volumes are outside the distribution of volumes for full-interphase nuclear trajectories based on the volume, surface area and surface area to volume ratio. Used to detect interphase timepoints for transient growth rate measurements of nuclei that are not full-interphase trajectories.||boolean|nuc-morph-analysis| +|dxdt_48_volume|The transient growth rate. The change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of full interphase trajectories.|Transient growth rate|pixels cubed / frames|nuc-morph-analysis| +|neighbor_avg_volume_90um|The average volume of neighboring nuclei in the middle interphase nuclei in a 90 µm radius at each timepoint.||pixels cubed|nuc-morph-analysis| +|neighbor_avg_dxdt_48_volume_90um|The transient growth rate of neighboring nuclei in a 90 µm neighborhood. The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in a 90 µm radius.|Average transient growth rate in 90¬µm neighborhood|pixels cubed / frames|nuc-morph-analysis| +|neighbor_avg_volume_whole_colony|The average volume for middle interphase nuclei in the whole colony at every timepoint.||pixels cubed|nuc-morph-analysis| +|neighbor_avg_dxdt_48_volume_whole_colony|The average transient growth rate in the whole colony.  The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in the whole colony.|Colony average transient growth rate|pixels cubed / frames|nuc-morph-analysis| +|normalized_time|The normalized time is a measure of time for a full-interphase nuclear trajectory that ranges from 0 to 1, where 0 represents the time of nuclear lamin shell formation and 1 represents the time of nuclear lamin shell breakdown.|Normalized interphase time|unitless|nuc-morph-analysis| +|frame_transition|The calculated time of transition between expansion and growth phases in single full-interphase nuclear trajectory.|Start of growth (time)|frames|nuc-morph-analysis| +|sync_time_Ff|Time, synchronized to start at the time of lamin shell formation for a single full-interphase nuclear trajectory.|Synchronized time|frames|nuc-morph-analysis| +|volume_at_A|Volume at the time of lamin shell formation for a full-interphase nuclear trajectory.|Volume at formation|microns cubed|nuc-morph-analysis| +|location_x_at_A|X location at the time of lamin shell formation for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|location_y_at_A|Y location at the time of lamin shell formation for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|time_at_A|Time at lamin shell formation for a full-interphase nuclear trajectory.|Formation time|hours|nuc-morph-analysis| +|colony_time_at_A|Aligned colony time at lamin shell formation for a full-interphase nuclear trajectory.||hours|nuc-morph-analysis| +|volume_at_B|Volume at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory.|Volume at start of growth|microns cubed|nuc-morph-analysis| +|location_x_at_B|X location at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|location_y_at_B|Y location at the time of  transition, the start of the growth phase for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|time_at_B|Time at transition, the start of the growth phase for a full-interphase nuclear trajectory.||hours|nuc-morph-analysis| +|colony_time_at_B|Aligned colony time at transition, the start of the growth phase for a full-interphase nuclear trajectory.|Aligned colony time at start of growth|hours|nuc-morph-analysis| +|volume_at_C|Volume at the time of lamin shell breakdown for a full-interphase nuclear trajectory.|Volume at end of growth|microns cubed|nuc-morph-analysis| +|location_x_at_C|X location at the time of lamin shell breakdown for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|location_y_at_C|Y location at the time of lamin shell breakdown for a full-interphase nuclear trajectory.||pixels|nuc-morph-analysis| +|time_at_C|Time at transition, the start of lamin shell breakdown for a full-interphase nuclear trajectory.|End of growth (time)|hours|nuc-morph-analysis| +|colony_time_at_C|Aligned colony time at lamin shell breakdown for a full-interphase nuclear trajectory.||hours|nuc-morph-analysis| +|duration_AB|Duration of the expansion phase from lamin shell formation to transition for a full-interphase nuclear trajectory.|Expansion duration|frames|nuc-morph-analysis| +|duration_BC|Duration of the growth phase from transition to lamin shell breakdown for a full-interphase nuclear trajectory.|Growth duration|frames|nuc-morph-analysis| +|duration_AC|The amount of volume added from transition to breakdown for a full-interphase nuclear trajectory.|Total interphase Duration|frames|nuc-morph-analysis| +|delta_volume_BC|The amount of volume added from transition to breakdown for a full-interphase nuclear trajectory.|Added volume during growth|microns cubed|nuc-morph-analysis| +|volume_fold_change_BC|The volume fold-change from transition to breakdown for a full-interphase nuclear trajectory.|Growth volume fold change|unitless|nuc-morph-analysis| +|SA_at_B|Surface area at the time of transition, the start of the growth phase for a full-interphase nuclear trajectory.|Surface area at start of growth|microns squared|nuc-morph-analysis| +|SA_at_C|Surface area at the time of lamin shell breakdown for a full-interphase nuclear trajectory.|Surface area at end of growth|microns squared|nuc-morph-analysis| +|delta_SA_BC|The amount of surface area added from transition to breakdown for a full-interphase nuclear trajectory.|Added surface area during growth|microns squared|nuc-morph-analysis| +|SA_fold_change_BC|The surface area fold change from transition to breakdown for a full-interphase nuclear trajectory.|Surface area fold change during growth|unitless|nuc-morph-analysis| +|volume_fold_change_fromB|The volume fold-change relative to the volume at transition, the start of the growth phase for a full-interphase nuclear trajectory.|Volume fold-change relative to starting volume|unitless|nuc-morph-analysis| +|SA_fold_change_fromB|The surface area fold-change relative to the volume at transition, the start of the growth phase for a full-interphase nuclear trajectory.|Surface area fold-change relative to starting surface area|unitless|nuc-morph-analysis| +|growth_rate_AB|The growth rate of the expansion phase calculated by endpoints: (volume_at_B - volume_at_A) / duration_AB for a full-interphase nuclear trajectory.|Expansion rate|microns cubed / hr|nuc-morph-analysis| +|late_growth_rate_by_endpoints|The growth rate of the growth phase calculated by endpoints: (volume_at_C - volume_at_B) / duration_BC for a full-interphase nuclear trajectory.|Growth rate|microns cubed / frames|nuc-morph-analysis| +|tscale_linearityfit_volume|Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the fitted time scaling factor 𝛼 for a full-interphase nuclear trajectory.|Fitted time scaling factor|unitless|nuc-morph-analysis| +|atB_linearityfit_volume|Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the starting volume fit parameter 𝑉𝑠𝑡𝑎𝑟𝑡 for a full-interphase nuclear trajectory.||microns cubed|nuc-morph-analysis| +|rate_linearityfit_volume|Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the rate fit parameter r for a full-interphase nuclear trajectory.||1/hr|nuc-morph-analysis| +|RMSE_linearityfit_volume|Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the root mean squared error of the fitted volume compared to the actual volume trajectory for a full-interphase nuclear trajectory.|Fitted volume root mean squared error (Power law)|microns cubed|nuc-morph-analysis| +|is_full_track|Is true if the timepoint is part of a full interphase trajectory for a full-interphase nuclear trajectory .||boolean|nuc-morph-analysis| +|exploratory_dataset|Baseline colonies column to filter datasets for visualization in timelapse feature explorer.|Exploratory dataset filter|boolean|nuc-morph-analysis| +|baseline_colonies_dataset|Baseline colonies column to filter datasets for visualization in timelapse feature explorer.|Baseline colonies dataset filter|boolean|nuc-morph-analysis| +|full_interphase_dataset|Baseline colonies column to filter datasets for visualization in timelapse feature explorer.|Full interphase analysis dataset filter|boolean|nuc-morph-analysis| +|lineage_annotated_dataset|Baseline colonies column to filter datasets for visualization in timelapse feature explorer.|Lineage annotated analysis dataset filter|boolean|nuc-morph-analysis| +|frame_of_formation|True if the the frame is the predicted lamin shell formation timepoint.||boolean|nuc-morph-analysis| +|frame_of_breakdown|True if the the frame is the predicted lamin shell breakdown timepoint.||boolean|nuc-morph-analysis| +|has_mitotic_neighbor_formation|True if the nucleus at a given timepoint has a directly adjacent neighbor undergoing lamin shell formation.||boolean|nuc-morph-analysis| +|has_mitotic_neighbor_breakdown|True if the nucleus at a given timepoint has a directly adjacent neighbor undergoing lamin shell breakdown.||boolean|nuc-morph-analysis| +|has_mitotic_neighbor|True if the nucleus at a given timepoint has a directly adjacent neighbor undergoing mitosis identified by a single lamin shell breakdown or lamin shell formation event.|Mitotic neighbor identified|boolean|nuc-morph-analysis| +|has_dying_neighbor|True if the nucleus at a given timepoint has a directly adjacent neighbor undergoing cell death.||boolean|nuc-morph-analysis| +|sum_has_dying_neighbor|Sum of dying adjacent neighbors over lifetime for a full-interphase nuclear trajectory.||events|nuc-morph-analysis| +|sum_has_mitotic_neighbor|Sum of mitotic adjacent neighbors over lifetime for a full-interphase nuclear trajectory.||events|nuc-morph-analysis| +|identified_death|Index sequence of cell death. Identified as the final frame when a nucleus has a termination value of 2.||frames|nuc-morph-analysis| +|frame_of_death|True if the the frame is the timepoint of cell death.||boolean|nuc-morph-analysis| +|has_dying_neighbor_forward_dilated|True if has_dying_neighbor is true, along with the 6 frames following the cell death timepoint.|Has dying neighbor flag|boolean|nuc-morph-analysis| +|has_mitotic_neighbor_breakdown_forward_dilated|If the nucleus has a directly adjacent neighbor that undergoes lamin shell breakdown, this feature is true at the timepoint of that lamin shell breakdown and at the six following timepoints.||boolean|nuc-morph-analysis| +|has_mitotic_neighbor_formation_backward_dilated|If the nucleus has a directly adjacent neighbor that undergoes lamin shell breakdown, this feature is true at the timepoint of that lamin shell breakdown and at the six prior timepoints.||boolean|nuc-morph-analysis| +|has_mitotic_neighbor_dilated|True if has_mitotic_neighbor_breakdown_forward_dilated or has_mitotic_neighbor_formation_backward_dilated is true, capturing all timepoints when neighbors are undergoing mitosis.|Has mitotic neighbor flag|boolean|nuc-morph-analysis| +|normalized_sum_has_mitotic_neighbor|Frequency of mitotic adjacent neighbors for a full-interphase nuclear trajectory. Calculated by the sum of mitotic adjacent nuclei normalized by the duration of the growth phase.|Frequency of mitotic adjacent neighbors|events/frames|nuc-morph-analysis| +|normalized_sum_has_dying_neighbor|Frequency of dying adjacent neighbors for a full-interphase nuclear trajectory. Calculated by the sum of dying adjacent nuclei normalized by the duration of the growth phase.|Frequency of dying adjacent neighbors|events/frames|nuc-morph-analysis| +|neighbor_avg_lrm_volume_90um|The average volume of neighboring nuclei in a 90 µm radius at each timepoint.||pixels cubed / frames|nuc-morph-analysis| +|neighbor_avg_lrm_height_90um|The average height of neighboring nuclei in a 90 µm radius at each timepoint.||pixels|nuc-morph-analysis| +|neighbor_avg_lrm_xy_aspect_90um|The average XY aspect ratio of neighboring nuclei in a 90 µm radius at each timepoint.||unitless|nuc-morph-analysis| +|neighbor_avg_lrm_mesh_sa_90um|The average surface area of neighboring nuclei in a 90 µm radius at each timepoint.||pixels squared|nuc-morph-analysis| +|neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um|The average density of neighboring nuclei in a 90 µm radius at each timepoint.||unitless|nuc-morph-analysis| +|mean_neighbor_avg_dxdt_48_volume_90um|The temporal mean of 'neighbor_avg_dxdt_48_volume_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean transient growth rate|pixels cubed / frames|nuc-morph-analysis| +|mean_neighbor_avg_lrm_volume_90um|The temporal mean of 'neighbor_avg_lrm_volume_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean volume|pixels cubed|nuc-morph-analysis| +|mean_neighbor_avg_lrm_height_90um|The temporal mean of 'neighbor_avg_lrm_height_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean height|pixels|nuc-morph-analysis| +|mean_neighbor_avg_lrm_xy_aspect_90um|The temporal mean of 'neighbor_avg_lrm_xy_aspect_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean XY aspect ratio|unitless|nuc-morph-analysis| +|mean_neighbor_avg_lrm_mesh_sa_90um|The temporal mean of 'neighbor_avg_lrm_mesh_sa_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean surface area|pixels squared|nuc-morph-analysis| +|mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um|The temporal mean of 'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um' over the lifetime of a full-interphase nuclear trajectory.|Neighborhood avg. mean density|unitless|nuc-morph-analysis| +|early_neighbor_avg_dxdt_48_volume_90um|The average transient growth rate of neighboring nuclei within a 90 um radius around the start of the growth phase (2 hours after lamin shell formation) for a full-interphase nuclear trajectory.|Neighborhood avg. transient growth rate at ~start of growth|microns cubed / frames|nuc-morph-analysis| +|neighbor_avg_lrm_volume_90um_at_B|The average volume of neighboring nuclei within a 90 um radius at the start of the growth phase for a full-interphase nuclear trajectory.|Neighborhood avg. volume at start of growth|pixels cubed|nuc-morph-analysis| +|neighbor_avg_lrm_height_90um_at_B|The average height of neighboring nuclei within a 90 um radius at the start of the growth phase for a full-interphase nuclear trajectory.|Neighborhood avg. height at start of growth|pixels|nuc-morph-analysis| +|neighbor_avg_lrm_xy_aspect_90um_at_B|The average XY aspect ratio of neighboring nuclei within a 90 um radius at the start of the growth phase for a full-interphase nuclear trajectory.|Neighborhood avg. XY aspect ratio at start of growth|unitless|nuc-morph-analysis| +|neighbor_avg_lrm_mesh_sa_90um_at_B|The average surface area of neighboring nuclei within a 90 um radius at the start of the growth phase for a full-interphase nuclear trajectory.|Neighborhood avg. surface area at start of growth|pixels squared|nuc-morph-analysis| +|neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um_at_B|The average density of neighboring nuclei within a 90 um radius at the start of the growth phase for a full-interphase nuclear trajectory.|Neighborhood avg. density at start of growth|unitless|nuc-morph-analysis| +|sisters_volume_at_B|A nucleus's sister's volume at start of growth for lineage annotated full-interphase nuclear trajectory.|Sisters starting volume|pixels cubed|nuc-morph-analysis| +|sisters_duration_BC|A nucleus's sister's duration of the growth phase  for lineage annotated full-interphase nuclear trajectory.|Sisters growth duration|pixels cubed / frames|nuc-morph-analysis| +|sisters_delta_volume_BC|A nucleus's sister's added volume for lineage annotated full-interphase nuclear trajectory.|Sisters added volume|pixels cubed|nuc-morph-analysis| +|height_at_B|The height at the start of the growth phase for a full-interphase nuclear trajectory.|Height at start of growth|pixels|nuc-morph-analysis| +|xy_aspect_at_B|The XY aspect ratio at the start of the growth phase for a full-interphase nuclear trajectory.|XY aspect ratio at start of growth|unitless|nuc-morph-analysis| +|SA_vol_ratio_at_B|The surface area to volume ratio at the start of the growth phase for a full-interphase nuclear trajectory.|Surface area/volume ratio at start of growth|pixels squared|nuc-morph-analysis| +|2d_area_nucleus|The area of the nucleus (from maximum z projected nuclear segmentation).|2D nuclear area|pixels squared|nuc-morph-analysis| +|2d_area_pseudo_cell|The area of the pseudo cell (from maximum z projected nuclear segmentation).|Pseudo cell area|pixels squared|nuc-morph-analysis| +|2d_area_nuc_cell_ratio|The ratio between the '2d_area_nucleus' and the '2d_area_psuedo_cell' gives a metric represtative of the local density for each nucleus at every timepoint. If the density could not be properly calculated at this timepoint (ie. edge of colony, has a mitotic neighbor etc) the column value is NaN.|Density|unitless|nuc-morph-analysis| +|2d_perimeter_nucleus|The perimeter of the nucleus (from maximum z projected nuclear segmentation).||pixels|nuc-morph-analysis| +|2d_perimeter_pseudo_cell|The perimeter of the pseudo cell (from maximum z projected nuclear segmentation).||pixels|nuc-morph-analysis| +|2d_perimeter_nuc_cell_ratio|The ratio between the '2d_perimeter_nucleus' and the '2d_perimeter_pseudo_cell'.||unitless|nuc-morph-analysis| +|bad_pseudo_cells_segmentation|True if a pseudo cell segmentation correpsonding to the nucleus is known, a priori, to be erroneously large due to being at edge of colony or having neighbors that lack a segmentation, such as mitotic cells. (See supp Fig. S4D)||boolean|nuc-morph-analysis| +|uncaught_pseudo_cell_artifact|True if pseudo cell segmentation corresponding to the nucleus is abnormally large and not caught by the bad_pseudo_cells_segmentation filter. The criteria are: '2d_perimeter_nuc_cell_ratio' < 0.4 OR '2d_perimeter_pseudo_cell' > 500 pixels OR '2d_area_nuc_cell_ratio' < 0.2||boolean|nuc-morph-analysis| +|volume_change_over_25_minutes|The nuclear volume change in a 25 minute window at each frame (t) calculated by ∆V(t) = V(t-5) - V(t).|Change in volume in 25 minute window|pixels cubed|nuc-morph-analysis| +|power_fit_volume|Each volume trajectory from transition to breakdown was fit to a power law scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the power fit volume with timepoints from identified volume dips removed.|Power law fitted volume|voxels cubed|nuc-morph-analysis| +|volume_dips_peak_mask_at_region|True at all timepoints along  an identified volume dip event.|Volume dip flag|boolean|nuc-morph-analysis| +|volume_dips_peak_mask_at_center|True at the timepoint that is the "peak" (or minima) of an identified volume dip event.||boolean|nuc-morph-analysis| +|volume_dips_volume_change_at_center|The change in volume from the start of a volume dip to the "peak" (or minima) of a volume dip. Value only reported at the timepoint of the volume dip "peak" (or minima)||microns cubed|nuc-morph-analysis| +|volume_dips_removed_um_unfilled|The volume trajectory of a nucleus with the timepoints within each volume dip event region replaced with NaN.||microns cubed|nuc-morph-analysis| +|dxdt_48_volume_dips_removed_um_unfilled|The transient growth rate computed with volume dips removed (generated by computing transient growth rate using the `volume_dips_removed_um_unfilled' column. )||pixels cubed / frames|nuc-morph-analysis| +|neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um|The transient growth rate of neighboring nuclei in a 90 µm neighborhood with volume dips removed. The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in a 90 µm radius. Volume measurements identified as volume dips were excluded.||pixels cubed / frames|nuc-morph-analysis| +|neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony|The transient growth rate of all nuclei in the colony with volume dips removed. The average change in volume over index sequence for a 4 hour rolling window for middle interphase timepoints of nuclei in the whole colony. Volume measurements identified as volume dips were excluded.||pixels cubed / frames|nuc-morph-analysis| +|RMSE_exponentialfit_volume|Each volume trajectory from transition to breakdown was fit to an exponential scaling with time 𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡^𝛼. This column is the root mean squared error of the fitted volume compared to the actual volume trajectory for a full-interphase nuclear trajectory.||microns cubed|nuc-morph-analysis| +|RMSE_linearfit_volume|Each volume trajectory from transition to breakdown was fit to a linear scaling with time  𝑉(𝑡)=𝑉𝑠𝑡𝑎𝑟𝑡+𝑟𝑡. This column is the root mean squared error of the fitted volume compared to the actual volume trajectory for a full-interphase nuclear trajectory.||microns cubed|nuc-morph-analysis| \ No newline at end of file From ab83497d107400fb2d4b145a3132c515e6bca086 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 17:11:21 -0800 Subject: [PATCH 30/34] update import paths --- .../neighbor_of_X/run_all_example_worfklows.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/run_all_example_worfklows.py b/nuc_morph_analysis/analyses/neighbor_of_X/run_all_example_worfklows.py index 9f0a5c5c..ce07f7a1 100644 --- a/nuc_morph_analysis/analyses/neighbor_of_X/run_all_example_worfklows.py +++ b/nuc_morph_analysis/analyses/neighbor_of_X/run_all_example_worfklows.py @@ -1,11 +1,10 @@ #%% from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering -from nuc_morph_analysis.analyses.neighbor_of_X import example_dying_track_neighbors -from nuc_morph_analysis.analyses.neighbor_of_X import example_mitotic_track_neighbors -from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_neighbors_of_dying -from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_of_neighbors_of_mitotic -from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_numbers_of_mitotic -from nuc_morph_analysis.analyses.neighbor_of_X import example_of_tracks_with_multiple_mitotic_neighbors +from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_dying_track_neighbors +from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_mitotic_track_neighbors +from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_timepoint_neighbors_of_dying +from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_timepoint_of_neighbors_of_mitotic +from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_of_tracks_with_multiple_mitotic_neighbors #%% df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline') #%% @@ -13,5 +12,4 @@ example_mitotic_track_neighbors.run_example(df) example_timepoint_neighbors_of_dying.run_example(df) example_timepoint_of_neighbors_of_mitotic.run_example(df) -example_timepoint_numbers_of_mitotic.run_example(df) example_of_tracks_with_multiple_mitotic_neighbors.run_example(df) From bdbb8bccae5894babede6a68001ab3709f5344c3 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Fri, 6 Dec 2024 17:12:53 -0800 Subject: [PATCH 31/34] update documentation --- .../lib/visualization/write_data_for_colorizer_README.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md index 100982b1..d630a25b 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md @@ -26,15 +26,8 @@ To prepare the colony datasets to be grouped in a dropdown menu, create the file while the "lineage-annotated" subset of the data only exists for the Small and Medium colonies, so the `collection.json` file for that dataset excludes the line pertaining to the Large colony. ## Overwriting the data for an existing dataset (unchanged segmentations/rows in manifest) -To overwrite an existing segmented version of the dataset (for example to add/remove/change a feature), simply run the script with the existing output directory and add the `--noframes` option to skip the frame generation step. +To overwrite an existing segmented version of the dataset (for example to add/remove/change a feature), simply run the script with the existing output directory and add the `--noframes` option to skip the frame generation step. If backdrop images have already been generated, optionally use `--make_backdrops False` to skip that step as well. ``` pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir {existing_output_dir_name} --noframes -pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_new/ --noframes - - - -pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_full/ --parallel - - ``` \ No newline at end of file From a8aaa290b51c797e9a561664c1feb3ebaed5b2b7 Mon Sep 17 00:00:00 2001 From: Julie Dixon Date: Sat, 7 Dec 2024 13:14:17 -0800 Subject: [PATCH 32/34] remove extraneous denisities output from calc colony metrics --- nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py index 78dc1ac0..38fde28e 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py +++ b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py @@ -53,7 +53,7 @@ def add_colony_metrics(df: pd.DataFrame): def _add_colony_metrics_one_tp(df_timepoint: pd.DataFrame): - depth_map, neighborhoods, neigh_dists, densities = _calc_colony_metrics(df_timepoint) + depth_map, neighborhoods, neigh_dists = _calc_colony_metrics(df_timepoint) for _, (lbl, depth) in enumerate(depth_map.items()): df_timepoint.loc[df_timepoint["label_img"] == lbl, "colony_depth"] = depth From 38a6c61a5488932cb0134c7095ed3a7bc10e27f0 Mon Sep 17 00:00:00 2001 From: Julie Dixon Date: Sat, 7 Dec 2024 13:24:16 -0800 Subject: [PATCH 33/34] remove extra _ output no longer expected from _calculate_distance --- nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py index 38fde28e..e2eed392 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py +++ b/nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py @@ -84,7 +84,7 @@ def _calc_colony_metrics(df_timepoint): neighbors = _make_neighbor_map(voronoi, labels) centroids_by_label = {label: centroids_list[index] for index, label in enumerate(labels)} - neigh_distance, _ = _calculate_distance(labels, neighbors, centroids_by_label) + neigh_distance = _calculate_distance(labels, neighbors, centroids_by_label) depth1_labels = _get_depth1_labels(labels, centroids_list, voronoi) depth_map = calculate_depth(neighbors, depth1_labels) From 280d05079f64fcd99437b965fdd59aeac8970f74 Mon Sep 17 00:00:00 2001 From: Chantelle Leveille Date: Mon, 9 Dec 2024 09:35:49 -0800 Subject: [PATCH 34/34] Fix label table duplicate --- nuc_morph_analysis/lib/visualization/label_tables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index b188221d..893c039d 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -76,7 +76,6 @@ def get_scale_factor_table(dataset="all_baseline"): "2d_area_nucleus", "2d_area_pseudo_cell", ): (pix_size/2.5)**2, - ("2d_area_pseudo_cell"): (pix_size/2.5)**2, ("2d_area_nuc_cell_ratio"): 1, }