diff --git a/nuc_morph_analysis/analyses/density/extra_checks/check_that_density_works_for_perturbations.py b/nuc_morph_analysis/analyses/density/extra_checks/check_that_density_works_for_perturbations.py new file mode 100644 index 00000000..740ad859 --- /dev/null +++ b/nuc_morph_analysis/analyses/density/extra_checks/check_that_density_works_for_perturbations.py @@ -0,0 +1,103 @@ +# %% +from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features +from nuc_morph_analysis.lib.preprocessing import filter_data +import matplotlib.pyplot as plt +import matplotlib.pyplot as plt +import matplotlib +from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric +from nuc_morph_analysis.lib.visualization.reference_points import COLONY_COLORS, COLONY_LABELS +from nuc_morph_analysis.analyses.inhibitors.dataset_info import get_drug_perturbation_details_from_colony_name + +matplotlib.rcParams["pdf.fonttype"] = 42 +plt.rcParams["font.family"] = "Arial" + +#%% +def try_to_get_drug_name(colony_name): + try: + return get_drug_perturbation_details_from_colony_name(colony_name)["drugs_string"] + except: + return colony_name + +def plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=""): + fig, ax = plt.subplots(1, 1, figsize=(10, 4)) + + feature_col = "2d_area_nuc_cell_ratio" + scale, label, units, _ = get_plot_labels_for_metric(feature_col) + + new_colors = plt.cm.tab20(range(20)) + for ci, (colony, df_colony) in enumerate(df.groupby("colony")): + df_colony = df_colony.sort_values("index_sequence") + + color = COLONY_COLORS.get(colony,new_colors[ci]) + + if time_axis == "real_time": + time_col = "index_sequence" + x_label = "Real Time (hr)" + if time_axis == "colony_time": + time_col = "colony_time" + x_label = "Aligned Colony Time (hr)" + + grouper = df_colony[[time_col] + [feature_col]].groupby(time_col)[ + feature_col + ] + + # filter grouper so that only timepoints with more than 15 cells are included + count = grouper.count() + log_count = count[count>15].index + + + mean_density = grouper.mean() * scale + if error == "std": + std_density = grouper.std() * scale + lower = mean_density - std_density + upper = mean_density + std_density + if error == "percentile": + lower = grouper.quantile(0.05) * scale + upper = grouper.quantile(0.95) * scale + + time = mean_density.index.values * interval / 60 + + time = time[log_count] + mean_density = mean_density[log_count] + lower = lower[log_count] + upper = upper[log_count] + + ax.fill_between( + time, + lower, + upper, + alpha=0.12, + color=color, + zorder=0, + edgecolor="none", + label=COLONY_LABELS.get(colony,try_to_get_drug_name(colony)), + ) + ax.plot( + time, mean_density, linewidth=1.2, color=color, label="", zorder=20 + ) + + ax.set_ylabel(f"Average Density \n Across Colony {units}") + ax.set_xlabel(x_label) + if show_legend is True: + # ax.legend(loc="upper right", handletextpad=0.7, frameon=False) + # put legend outside to the right + ax.legend(loc="center left", bbox_to_anchor=(1.1, 0.5), frameon=False) + plt.title(titlestr) + plt.tight_layout() + # save_and_show_plot( + # f"{figdir}/avg_density_colony_{time_axis}_alignment-{feature_col}", + # file_extension=".pdf", + # dpi=300, + # transparent=True, + # ) + +from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features +for dataset in ["all_drug_perturbation","all_feeding_control","all_baseline"]: + df0 = load_dataset_with_features(dataset,load_local=True) + df = filter_data.all_timepoints_minimal_filtering(df0) + figdir = f"figures/{dataset}/density_plots" + plot_perturbation_densities(df, figdir, time_axis = 'real_time', error="percentile", show_legend=True, interval=5,titlestr=dataset) + + +#%% \ No newline at end of file diff --git a/nuc_morph_analysis/lib/preprocessing/all_datasets.py b/nuc_morph_analysis/lib/preprocessing/all_datasets.py index 152e85b8..d222c4c1 100644 --- a/nuc_morph_analysis/lib/preprocessing/all_datasets.py +++ b/nuc_morph_analysis/lib/preprocessing/all_datasets.py @@ -22,7 +22,7 @@ "all_baseline": { # this is the common info for all baseline datasets # FMS ID for 2024-07-08_main_manifest.parquet generated from morflowgenesis v0.3.0 # with generate_main_manifest.py at commit 6e9eb0962343113ab3999ce6b59d8331ddab9a45 - "fmsid": "443ac819f633494f936ff410c14c21ed", # morflowgenesis v0.3.0 updated with 2d_area from watershed PR (9/19/24) + "fmsid": "443ac819f633494f936ff410c14c21ed", # morflowgenesis v0.3.0 updated with new density from watershed PR (9/19/24) "s3_path": INTERMEDIATE_MANIFEST_DIR / "2024-06-25_baseline_intermediate_manifest.parquet", "pixel_size": PIXEL_SIZE_YX_100x, "time_interval": 5, # min @@ -35,7 +35,7 @@ "all_feeding_control": { # this is the common info for all "feeding_control" datasets # FMS ID for 2024-06-16_feeding_control_main_manifest.parquet generated from morflowgenesis v0.3.0 # with generate_perturbation_manifest.py at commit ebe76b5e84c9ca24617e4d04aed8acc1c2c3bb62 - "fmsid": "8ecd9b04329b490baec500859e276fbe", # morflowgenesis v 0.3.0 + "fmsid": "f95429aa9d084a699d9e591afd2f7792", # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24) "s3_path": INTERMEDIATE_MANIFEST_DIR / "2024-06-14_feeding_control_intermediate_manifest.parquet", "pixel_size": PIXEL_SIZE_YX_100x, @@ -49,7 +49,7 @@ "all_drug_perturbation": { # this is the common info for all "drug_perturbation" datasets # FMS ID for 2024-06-24_drug_perturbation_main_manifest.parquet generated from morflowgenesis v0.3.0 # with generate_perturbation_manifest.py at commit 725ed45a6413391b9927610649e6209c04bcae9f - "fmsid": "19e1125fd9c4413e8babe2e9de8d9b87", # morflowgenesis v 0.3.0 + "fmsid": "5e8170e7881a4ad09c236e3e0c056d75", # morflowgenesis v 0.3.0 updated with new density from density_calc PR (12/5/24) "s3_path": INTERMEDIATE_MANIFEST_DIR / "2024-06-24_inhibitor_perturbation_intermediate_manifest.parquet", "pixel_size": PIXEL_SIZE_YX_100x, diff --git a/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py b/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py index a3c8c680..36bd345a 100644 --- a/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py +++ b/nuc_morph_analysis/lib/preprocessing/generate_perturbation_manifest.py @@ -18,6 +18,7 @@ match_and_update_dataframe, FRAMES_TO_SHIFT, ) +from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow # %% @@ -86,8 +87,23 @@ def generate_manifest_one_colony(morflowgenesis_df, dataset, experiments=None): # -------------------------- # add_colony_metrics features logging.info("Calculating colony metrics") - return add_colony_metrics(step5_df) + step5_df = add_colony_metrics(step5_df) + # -------------------------- + # STEP 6: calculate 2D object-based density + # -------------------------- + logging.info("Calculating image-based density metrics") + step6_df = step5_df.copy() + density_df = watershed_workflow.get_pseudo_cell_boundaries_for_movie(dataset, parallel=True) + # now merge the density_df with the main dataframe + step6_df = pd.merge(step6_df, + density_df, + on=['colony','index_sequence','label_img'], + suffixes=('', '__dup_col'), + how='left') + # now remove columns with __dup_col suffix + step6_df = step6_df[step6_df.columns.drop(list(step6_df.filter(regex='__dup_col')))] + return step6_df def get_combined_manifest(experiments): """ @@ -107,7 +123,11 @@ def get_combined_manifest(experiments): # %% -for experiments in ["feeding_control", "drug_perturbation"]: - df = get_combined_manifest(experiments) - write_result(df, f"{experiments}_main_manifest", format="parquet") +def run_workflow(): + for experiments in ["feeding_control", "drug_perturbation"]: + df = get_combined_manifest(experiments) + write_result(df, f"{experiments}_main_manifest", format="parquet") # %% + +if __name__ == "__main__": + run_workflow() diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index bdbd7f79..da14de1c 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -393,7 +393,7 @@ def merge_datasets(df_all, df_full): 'resolution_level', '2d_area_cyto', 'inv_cyto_density', - 'density' + 'density', # created in add_groth_features.fit_tracks_to_model() 'tscale_exponentialfit_volume', @@ -438,6 +438,8 @@ def remove_columns(df, column_list=COLUMNS_TO_DROP): df : pandas.DataFrame The dataframe with the columns removed. """ + column_list = [col for col in column_list if col in df.columns] + df = df.drop(columns=column_list) return df diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py index dd0d0162..8c84a96e 100644 --- a/run_all_manuscript_workflows.py +++ b/run_all_manuscript_workflows.py @@ -3,7 +3,6 @@ from nuc_morph_analysis.analyses.colony_area import colony_area_workflow from nuc_morph_analysis.analyses.segmentation_model_validation import seg_model_validation_figure_workflow - class Workflows: def figure_1_dataset(): import nuc_morph_analysis.analyses.dataset_images_for_figures.figure_1_workflow