diff --git a/nuc_morph_analysis/analyses/evaluate_filters_and_outliers/evaluate_time_binning.py b/nuc_morph_analysis/analyses/evaluate_filters_and_outliers/evaluate_time_binning.py new file mode 100644 index 00000000..a39bf138 --- /dev/null +++ b/nuc_morph_analysis/analyses/evaluate_filters_and_outliers/evaluate_time_binning.py @@ -0,0 +1,4 @@ +#%% +from nuc_morph_analysis.lib.preprocessing import add_times +fig,ax = add_times.validate_dig_time_with_plot() +fig.show() diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/misc_neighbor_helper_functions.py b/nuc_morph_analysis/analyses/neighbor_of_X/misc_neighbor_helper_functions.py new file mode 100644 index 00000000..8610d362 --- /dev/null +++ b/nuc_morph_analysis/analyses/neighbor_of_X/misc_neighbor_helper_functions.py @@ -0,0 +1,66 @@ +import numpy as np +from scipy.spatial import distance_matrix + +def find_immediate_neighbors(dfsearch,main_track_id,timepoint): + colony = dfsearch[dfsearch["track_id"] == main_track_id]["colony"].values[0] + dfcolony=dfsearch[dfsearch['colony']==colony] + + # get tracks by looking for neighbors + dftrack = dfcolony[(dfcolony["track_id"] == main_track_id) & (dfcolony["index_sequence"] == timepoint)] + cell_ids = dftrack["neighbors"].apply(lambda x:eval(x)).values[0] + + dftime = dfcolony[dfcolony["index_sequence"] == timepoint] + immediate_neighbor_track_ids = dftime.loc[dftime.index.isin(cell_ids), "track_id"].values + return immediate_neighbor_track_ids + + +def compute_distances_between_nuclei(dftime,main_track_id): + # alternative workflow to get tracks by measuring distance to neighbors + dftrack = dftime[(dftime["track_id"] == main_track_id)] + centroids = dftrack[["centroid_x", "centroid_y"]].values + centroids_time = dftime[["centroid_x", "centroid_y"]].values + dist = distance_matrix(centroids, centroids_time) + return dist + + +def get_ordered_list_of_nuclei_by_distance_to_track(dist,dftime): + # now determine the ordered list of neighbors + sorted_index = np.argsort(dist, axis=1).reshape(-1,) + sorted_dist = np.sort(dist, axis=1).reshape(-1,) + sorted_cell_ids = dftime.index.values[sorted_index] + sorted_track_ids = dftime['track_id'].values[sorted_index] + return sorted_dist,sorted_cell_ids,sorted_track_ids,sorted_index + +def get_a_cells_neighbors_as_track_id_list(df0,main_track_id,TIMEPOINT,return_self=True): + """ + identify the neighboring tracks of a cell at a given timepoint + + Parameters + ---------- + df0 : pd.DataFrame + dataframe with columns ['colony','track_id','index_sequence','label_img','frame_transition','neighbors'] + main_track_id : int + track_id of the cell of interest + timepoint : int + timepoint at which to find neighbors + return_self : bool + whether to include the cell of interest in the list of neighbors + + Returns + ------- + track_id_list : list + list of track_ids that are neighbors of the cell of interest at the given + """ + dftrack = df0[df0["track_id"] == main_track_id] + colony = dftrack["colony"].values[0] + dfcolony = df0[df0["colony"] == colony] + df_after_transition_only = dfcolony[dfcolony['index_sequence'] > dfcolony['frame_transition']] # only include after growth + + # find all neighbors that are immediate neighbors (and have passed the transition point) + immediate_neighbor_track_ids = find_immediate_neighbors(df_after_transition_only,main_track_id,TIMEPOINT) + + # combine the main track with the immediate neighbors into a list + track_id_list = list(immediate_neighbor_track_ids) + if return_self: + track_id_list.append(main_track_id) + return track_id_list diff --git a/nuc_morph_analysis/analyses/volume/FigS10_readme.md b/nuc_morph_analysis/analyses/volume/FigS10_readme.md new file mode 100644 index 00000000..6fd84410 --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/FigS10_readme.md @@ -0,0 +1,26 @@ +global_dataset_filtering functions +add_volume_change_over_25_minute_window --> + 'volume_change_over_25_minutes' + +filter_out_dips.run_script(df_full) --> + + 'volume_dips_peak_mask_at_region', # boolean array, true at all points within peak region(s) + 'volume_dips_peak_mask_at_center', # boolean array, true at all peak centers + 'volume_dips_has_peak', # boolean array, true at all points if there is a peak + 'volume_dips_volume_change_at_center', # magnitude value at each peak center (left_base - peak) + 'volume_dips_volume_change_at_region', # magnitude values at all points within peak region(s) (left_base - peak) + 'volume_dips_width_at_center', # width of the peak at the peak center index + 'volume_dips_width_at_region', # width of the peak at all indices in the peak region + 'volume_dips_max_volume_change', # maximum magnitude value (at all points in array) + 'volume_dips_peak_id_at_center', # peak id at the peak center index + 'volume_dips_peak_id_at_region', # peak id at all indices in the peak region + 'volume_dips_total_number', # total number of peaks + + 'volume_dips_removed_um_unfilled' # volume trajectories with peak (volume dip) regions removed + +then compute growth rate to get --> + 'dxdt_48_volume_dips_removed_um_unfilled' + +and compute neighbors to get --> + 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um' + 'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony' diff --git a/nuc_morph_analysis/analyses/volume/FigS10_workflow.py b/nuc_morph_analysis/analyses/volume/FigS10_workflow.py new file mode 100644 index 00000000..2a94309f --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/FigS10_workflow.py @@ -0,0 +1,251 @@ +# %% +# import necessary libraries +import numpy as np +import matplotlib.pyplot as plt +from pathlib import Path +from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features + +from nuc_morph_analysis.analyses.volume.plot_help import ( + plot_neighbors_volume_over_time, plot_tracks_aligned_at_volume_drop_onset,update_plotting_params, + adjust_axis_positions, plot_track_with_fit_line, plot_track_with_volume_dip, + plot_dip_detection_validation, plot_dxdt_over_time_by_cell_cycle +) +from nuc_morph_analysis.lib.preprocessing import filter_data, add_times +from nuc_morph_analysis.lib.visualization.matplotlib_to_axlist import type_axlist +from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot +from nuc_morph_analysis.analyses.volume import filter_out_dips +from nuc_morph_analysis.analyses.neighbor_of_X.misc_neighbor_helper_functions import get_a_cells_neighbors_as_track_id_list + +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric +from nuc_morph_analysis.lib.visualization import plotting_tools +from nuc_morph_analysis.analyses.volume_variation import plot_features + +from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS + +#%% +# now load data with growth outliers +df_outliers = load_dataset_with_features('all_baseline', remove_growth_outliers=False) +# and without outliers +df = load_dataset_with_features('all_baseline', remove_growth_outliers=True) +#%% +# apply minimal filtering +df_outliers = filter_data.all_timepoints_full_tracks(df_outliers) +df = filter_data.all_timepoints_full_tracks(df) +#%% +# update plotting parameters +fs,fw,fh = update_plotting_params() +save_dir = Path(__file__).parent / 'figures' / 'volume_dip_figures' +# %% +# S10 panel A, left +# choose one track and its neighbors (at a given time) to plot over time +MAIN_TRACK_ID = EXAMPLE_TRACKS['volume_dip_example'] +TIMEPOINT = 239 +track_id_list = get_a_cells_neighbors_as_track_id_list(df_outliers,MAIN_TRACK_ID,TIMEPOINT) +fig,_ = plot_neighbors_volume_over_time(df_outliers,track_id_list) + +save_name: str = f"S10_A_left-immediate_neighbors_of_main_track_{MAIN_TRACK_ID}" +save_path: Path = save_dir / save_name +for ext in ['.png','.pdf']: + save_and_show_plot(str(save_path),ext,fig,transparent=False,keep_open=True) +plt.show() + +#%% +# S10 panel A, middle +fig,_ = plot_tracks_aligned_at_volume_drop_onset(df_outliers,track_id_list,MAIN_TRACK_ID,TIMEPOINT) + +save_name = f"S10_A_middle-dip_shape_{MAIN_TRACK_ID}" +save_path = Path( save_dir / save_name ) +for ext in ['.png','.pdf']: + save_and_show_plot(str(save_path),ext,fig,transparent=False,keep_open=True) +plt.show() + +# %% +# S10 panel B, illustrate the effect of volume dip on transient growth rate + +df_full = filter_data.all_timepoints_full_tracks(df) # filter to only full tracks + +fig,axlist_untyped = plt.subplots(nrows=2,ncols=1,figsize=(6.5,8)) +axlist = type_axlist(axlist_untyped) + +# add_time_point_lines=False,timepoint=None +volume_dip_example_track = 86570 + +main_track_list = [(volume_dip_example_track, 263)] +for main_track_id, timepoint in main_track_list: + ax = axlist[0] + ax = plot_track_with_volume_dip(ax,df_full,main_track_id,add_time_point_lines=True,timepoint=timepoint) + ax = axlist[1] + ax = plot_track_with_volume_dip(ax,df_full,main_track_id,xcol='index_sequence',ycol='dxdt_48_volume') + fig,axlist = adjust_axis_positions(fig,axlist,curr_pos=None,width=0.6,height=0.6,space=0.2,horizontal=False) + for ext in ['.png','.pdf']: + savepath = save_dir / f"S10B_track_{main_track_id}_volume_dip{ext}" + save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True) + plt.show() + +#%% S10panelC steps 1 and 2 +dftracks = df_full[df_full['track_id'].isin([volume_dip_example_track])] +columns_to_remove = dftracks.columns[dftracks.columns.str.contains('dip|jump')] +dftracks = dftracks.drop(columns=columns_to_remove) + +# reprocess so that intermediate arrays are returned +dftracks_out = filter_out_dips.run_script(dftracks, + return_intermediates=True, + use_detrended=True,) + +fig,ax = plot_dip_detection_validation(dftracks_out[dftracks_out['track_id'] == volume_dip_example_track],peak_str = 'dips') +fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=1.2,height=1.2,space=0.6,keep_labels=True) +for ext in ['.png','.pdf']: + savepath = save_dir / f"S10C_left_middle-track_{volume_dip_example_track}_dip_detection_validation{ext}" + save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True) +#%% +# S10 panel C step3 +df_track = df_full[df_full.track_id == volume_dip_example_track] +fig,axlist_untyped = plt.subplots(2,1,figsize=(fw,fh),sharey=False) +axlist = type_axlist(axlist_untyped) + +_ = plot_track_with_fit_line(df_track,axlist[0], + ycol1='volume', + ycol2='volume_dips_removed_um_unfilled', + ) + +_ = plot_track_with_fit_line(df_track, ax = axlist[1], + ycol1='dxdt_48_volume', + ycol2 = 'dxdt_48_volume_dips_removed_um_unfilled', + ) + +xlimmax = np.max([ax.get_xlim()[1] for ax in axlist]) +for ax in axlist: + + ax.set_xticks(np.arange(0,20,4)) + ax.set_xlim(-2,xlimmax) + +fig,axlist = adjust_axis_positions(fig,axlist,curr_pos=None,width=0.6,height=0.6,space=0.2,horizontal=False) +axlist[0].text(0.05,0.99,f"track {volume_dip_example_track}",transform=axlist[0].transAxes, + ha = 'left',va='top',fontsize=fs) +axlist[0].legend(loc='lower left',bbox_to_anchor=(1.05,0.0), + fontsize=fs,frameon=False, + markerscale=1,handlelength=1, + labelspacing=0, + ) + +# now save +savename: str = f"S10C_right-volume_fit_volume_fit_track{volume_dip_example_track}" +for ext in ['.png','.pdf']: + save_path = save_dir / savename + save_and_show_plot(str(save_path),ext,fig,transparent=False,keep_open=True) +plt.show() + +# %% +# S10 panel D +magnitude_col = 'volume_dips_volume_change_at_center' +ycol = 'volume_dips_peak_mask_at_center' +colony_list = ['small','medium','large'] + +for threshold in [-50,0]: + fig,axlist_untyped = plt.subplots(1,1,figsize=(fw,fh)) + axlist = type_axlist(axlist_untyped) + ax = axlist[0] + for ci,colony in enumerate(colony_list): + dfcolony = df[df['colony'] == colony] + dfthresh = dfcolony[dfcolony[ycol]==True] + dfthresh = dfthresh[dfthresh[magnitude_col] < threshold] + dfcount = dfthresh.groupby('index_sequence').count() + df_all = dfcolony[['track_id','index_sequence']].groupby('index_sequence').count() + df_dips = dfcount['track_id'] + df_all['number_of_nuclei'] = df_all['track_id'] + df_all['number_of_dips'] = 0 + df_all.loc[df_dips.index,'number_of_dips'] = df_dips.values + + # now plot number of dips over time + xscale,xlabel,xunit,_ = get_plot_labels_for_metric('index_sequence') + x = df_all.index * xscale + yd = df_all['number_of_dips'] + yn = df_all['number_of_nuclei'] + y = yd / yn *100 + + zorderval = 1 if threshold !=0 else -1 # to ensure large colony is in front when it has fewer peaks + ax.plot(x,y,label=colony,color=plotting_tools.COLONY_COLORS[colony],zorder=ci*1000*zorderval) + ax.set_xlabel(f"{xlabel} {xunit}") + ax.set_ylabel(f'% of nuclei') + # ax.set_title('Number of dips over time') + if threshold !=0 : + text_str = f"dips < {threshold} μm\u00B3" + else: + text_str = f"all dips" + ax.text(0.05,0.99,text_str,transform=ax.transAxes, + ha = 'left',va='top',fontsize=fs) + + fig,axlist = adjust_axis_positions(fig,axlist,curr_pos=None,width=0.9,height=0.5,space=0.075) + ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), + fontsize=fs,frameon=False, + markerscale=1,handlelength=1, + labelspacing=0, + ) + if threshold != -100: + curr_ylim = ax.get_ylim() + + ax.set_yticks(np.arange(0,30,5)) + ax.set_ylim(0,curr_ylim[1]*1.2) + else: + ax.set_yticks(np.arange(0,30,10)) + ax.set_ylim(0,28) + ax.set_xticks(np.arange(0,60,12)) + ax.set_xlim(0,48) + ax.set_xlabel('Movie time (hr)') + + + savename = f"S10_D-volume_dips_over_time_all_colonies-{ycol}-{threshold}" + for ext in ['.png','.pdf']: + savepath = save_dir / savename + save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True) + + +#%% +# S10 panel # +# now make plot with cell cycle bins overlayed on medium colony only (panel F right) +df_full = add_times.digitize_time_column(df_full,0,1,step_size=0.02,time_col='normalized_time',new_col='dig_time') + +ycol = 'dxdt_48_volume_dips_removed_um_unfilled' +for colony in colony_list: + dfc = df_full[df_full['colony']==colony] + fig,ax = plot_dxdt_over_time_by_cell_cycle(dfc,ycol) + + fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075) + + plt.suptitle(f"{ycol}") + for ext in ['.png','.pdf']: + savepath = save_dir / f"S10_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}" + save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True) + plt.show() + +#%% +# S10 panel F and G +colony='all_baseline' + +dfc = df_full[df_full["colony"] == colony] if colony != "all_baseline" else df_full +for local_radius_str in ["90um", "whole_colony"]: + for pngflag in [True, False]: + plot_features.scatter_plot( + dfc, + "all_baseline", + f"neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_{local_radius_str}", + "dxdt_48_volume_dips_removed_um_unfilled", + color_map="#808080", + figdir=save_dir, + fitting=False, + n_resamples=2, + require_square=False, + opacity=0.1, + markersize=10, + titleheader="full_tracks for all timepoints", + dpi=150, + file_extension=".pdf", + transparent=True, + add_unity_line=True, + remove_all_points_in_pdf=pngflag, + ) + + + +#%% + diff --git a/nuc_morph_analysis/analyses/volume/add_growth_features.py b/nuc_morph_analysis/analyses/volume/add_growth_features.py index b7812ff9..e29d597e 100644 --- a/nuc_morph_analysis/analyses/volume/add_growth_features.py +++ b/nuc_morph_analysis/analyses/volume/add_growth_features.py @@ -57,6 +57,7 @@ def fit_tracks_to_model( interval, model="power", plot=False, + add_fit_volume=False, ): """ This function fits the volume of each track to either a power law, exponential or @@ -72,6 +73,8 @@ def fit_tracks_to_model( The model to fit to. The default is "power". Other options are "exponential" and "linear". plot : bool, optional If True, a plot of the volume vs time for each track and its exponential fit is displayed. The default is False. + add_fit_volume : bool, optional + If True, the fit volume is added to the dataframe. The default is False. Returns ------- @@ -120,7 +123,7 @@ def fit_tracks_to_model( df_track = df_track.sort_values("index_sequence") df_track_trim = df_track[ (df_track.index_sequence > transition) & (df_track.index_sequence <= fb) - ] + ].copy() # get trimmed track times and volumes x = df_track_trim["index_sequence"].values * interval / 60 @@ -167,6 +170,12 @@ def fit_tracks_to_model( df.loc[df_track.index, f"rate_{model_name}fit_volume"] = rate df.loc[df_track.index, f"RMSE_{model_name}fit_volume"] = rmse + # add fit volumes to manifest (using index_sequence and track_id to match and add Z) + # will be named "power_fit_volume" + if add_fit_volume: + df_track_trim[f"{model}_fit_volume"] = z + df.loc[df_track_trim.index, f"{model}_fit_volume"] = df_track_trim.loc[df_track_trim.index, f"{model}_fit_volume"] + except Exception: fail_count += 1 diff --git a/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/correlating_transient_growth_rates.py b/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/correlating_transient_growth_rates.py new file mode 100644 index 00000000..b3bd7e5d --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/correlating_transient_growth_rates.py @@ -0,0 +1,71 @@ +#%% +from pathlib import Path +from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, load_data, filter_data +from nuc_morph_analysis.lib.preprocessing import add_times +import matplotlib +import matplotlib.pyplot as plt +from nuc_morph_analysis.analyses.volume.plot_help import group_and_extract +import pandas as pd +from itertools import combinations +from sklearn.linear_model import LinearRegression + +# %% +# set up plot parameters and figure saving directory +matplotlib.rcParams["pdf.fonttype"] = 42 +plt.rcParams["font.family"] = "Arial" +plt.rcParams.update({"font.size": 20}) +plt.rcParams["figure.figsize"] = [5, 4] +figdir = f"volume/figures/local_growth/" + +# %% +# load data +df = global_dataset_filtering.load_dataset_with_features() +df_full = filter_data.all_timepoints_full_tracks(df) +df_full = add_times.digitize_time_column(df_full,0,1,step_size=0.02,time_col='normalized_time',new_col='dig_time') + +#%% +# now collect the average transient growth rates for different cell cycle windows +cell_cycle_width = 0.2 +cell_cycle_centers = [0.3,0.5,0.7] +cell_cycle_bins = [(cc-cell_cycle_width/2,cc+cell_cycle_width/2) for cc in cell_cycle_centers] + +plot_type = 'mean' +colony_list = ['small','medium','large'] +# plot the mean of the value (ycol) binned by xcol for given cell cycle windows +ycol = 'dxdt_48_volume_dips_removed_um' +xcol1 = 'index_sequence' +dflist =[] +for colony in colony_list: + + dfc = df_full[df_full['colony']==colony] + for ri,cell_cycle_bin in enumerate(cell_cycle_bins): + dfcc = dfc[(dfc['dig_time'] >= cell_cycle_bin[0]) & (dfc['dig_time'] <= cell_cycle_bin[1])] + + dfnew = group_and_extract(dfcc,xcol1,ycol) + + + dfnew['cell_cycle'] = ri + dfnew['colony'] = colony + dfnew['bin'] = f"{cell_cycle_bin[0]:.1f}-{cell_cycle_bin[1]:.1f}" + dflist.append(dfnew) + +dfall = pd.concat(dflist) +#%% +# now ask how correlated the avg transient growth rates are between different cell cycle windows +for colony, dfallc in dfall.groupby('colony'): + cell_cycle_combos = list(combinations(dfall['cell_cycle'].unique(),2)) + + for c1,c2 in cell_cycle_combos: + df1 = dfallc[dfallc['cell_cycle']==c1] + df2 = dfallc[dfallc['cell_cycle']==c2] + dfcat = pd.merge(df1,df2,on='index_sequence',suffixes=('_1','_2'),how='inner') + dfcat.dropna(subset=[f'nanmean_1',f'nanmean_2'],inplace=True) + + x = dfcat[f'nanmean_1'].astype('array') + y = dfcat[f'nanmean_2'] + + reg = LinearRegression().fit(x.values.reshape(-1,1),y) + r2 = reg.score(x.values.reshape(-1,1),y) + print(f"{colony} {c1}-{c2} r2={r2:.2f}") + + diff --git a/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/growth_over_normalized_interphase_time.py b/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/growth_over_normalized_interphase_time.py new file mode 100644 index 00000000..950a5c95 --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/growth_over_normalized_interphase_time.py @@ -0,0 +1,39 @@ +#%% +from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, filter_data +from nuc_morph_analysis.lib.preprocessing import add_times +import matplotlib +import matplotlib.pyplot as plt +from nuc_morph_analysis.analyses.volume.plot_help import group_and_extract, plot_dfg + +# %% +# set up plot parameters and figure saving directory +matplotlib.rcParams["pdf.fonttype"] = 42 +plt.rcParams["font.family"] = "Arial" +plt.rcParams.update({"font.size": 20}) +plt.rcParams["figure.figsize"] = [5, 4] +figdir = f"volume/figures/local_growth/" + +# %% +# load data +df = global_dataset_filtering.load_dataset_with_features() +df_full = filter_data.all_timepoints_full_tracks(df) +df_full = add_times.digitize_time_column(df_full,0,1,step_size=0.02,time_col='normalized_time',new_col='dig_time') + +#%% +# now collect the average transient growth rates for different cell cycle windows + +plot_type = 'mean' +colony_list = ['small','medium','large'] +# plot the mean of the value (ycol) binned by xcol for given cell cycle windows +ycol = 'dxdt_48_volume' +xcol1 = 'dig_time' +dflist =[] +for colony in colony_list: + + fig,curr_ax = plt.subplots(1,1,figsize=(5,4)) + dfc = df_full[df_full['colony']==colony] + + + curr_ax = plot_dfg(dfc,xcol1,ycol,f"{colony.capitalize()}",curr_ax,plot_type=plot_type,colorby='colony') + + plt.show() diff --git a/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py b/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py index e9704ba3..accc8ea8 100644 --- a/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py +++ b/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py @@ -1,6 +1,6 @@ # %% from pathlib import Path -from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, load_data, filter_data +from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, load_data, filter_data, add_times from nuc_morph_analysis.analyses.volume_variation import plot_features from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot from nuc_morph_analysis.analyses.volume.add_growth_features import plot_fit_parameter_distribution @@ -11,6 +11,7 @@ from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS +from nuc_morph_analysis.analyses.volume.plot_help import plot_dxdt_over_time_by_cell_cycle, adjust_axis_positions import matplotlib import matplotlib.pyplot as plt @@ -236,3 +237,18 @@ ) # %% +# S5 panel F and S9 panel E +df_full = add_times.digitize_time_column(df_full,0,1,step_size=0.02,time_col='normalized_time',new_col='dig_time') +ycol = 'dxdt_48_volume' +colony_list = ['small','medium','large'] +for colony in colony_list: + dfc = df_full[df_full['colony']==colony] + fig,ax = plot_dxdt_over_time_by_cell_cycle(dfc,ycol) + + fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075) + + plt.suptitle(f"{ycol}") + for ext in ['.png','.pdf']: + savepath = Path(figdir) / f"S5_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}" + save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True) + plt.show() diff --git a/nuc_morph_analysis/analyses/volume/filter_out_dips.py b/nuc_morph_analysis/analyses/volume/filter_out_dips.py new file mode 100644 index 00000000..ad125a05 --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/filter_out_dips.py @@ -0,0 +1,418 @@ +import numpy as np +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric +from scipy.signal import savgol_filter +from scipy.signal import find_peaks +import numpy as np +import pandas as pd + +def find_dips_relative_to_fit(y, + prominence=(20,None), + width=(2,24), + height=(None,None), + threshold=(None,None), + rel_height=1, + wlen=25): + """ + find the dips in a trajectory + the default parameters assume nuclear volume data smoothed AND relative to the power law fit volume (or smoothed only) + + Parameters + ---------- + y : np.array + volume data + parameters for the find peaks function in scipy + + Returns + ------- + tuple + peaks, props + + """ + peaks, props = find_peaks(y, prominence=prominence, width=width,height=height,threshold=threshold,rel_height=rel_height,wlen=wlen) + return peaks, props + +def find_and_remove_from_pivot(y,y2,min_index=0,peak_str='dips',volume_change_threshold=0): + """ + find peaks in y (volume data, smoothed and detrended OR smoothed only), + collect features (from y and/or y2) and return a dictionary with the features + + Parameters + ---------- + y : np.array + volume data (smoothed volume, detrended or not) + y2 : np.array + raw volume data used to compute the peak magnitude + min_index : int + minimum index value for the volume data + this is needed to ensure the index values are correct in the final dataframe + peak_str : str + string to use for the peak type ('dips' or 'jumps') + + Returns + ------- + dict + dictionary with keys [ + f'volume_{peak_str}_peak_mask_at_region', # boolean array, true at all points within peak region(s) + f'volume_{peak_str}_peak_mask_at_center', # boolean array, true at all peak centers + f'volume_{peak_str}_has_peak', # boolean array, true at all points if there is a peak + f'volume_{peak_str}_volume_change_at_center', # magnitude value at each peak center (left_base - peak) + f'volume_{peak_str}_volume_change_at_region', # magnitude values at all points within peak region(s) (left_base - peak) + f'volume_{peak_str}_width_at_center', # width of the peak at the peak center index + f'volume_{peak_str}_width_at_region', # width of the peak at all indices in the peak region + f'volume_{peak_str}_max_volume_change', # maximum magnitude value (at all points in array) + f'volume_{peak_str}_peak_id_at_center', # peak id at the peak center index + f'volume_{peak_str}_peak_id_at_region', # peak id at all indices in the peak region + f'volume_{peak_str}_total_number', # total number of peaks + ] + + where {} is the peak_str + """ + + peaks, props = find_dips_relative_to_fit(y) # run scipy peak finder + + # if peaks present, set entire array to True + has_peak_bool_array = np.zeros(y.shape,dtype='bool') if len(peaks)==0 else np.ones(y.shape,dtype='bool') + + # initialize new feature arrays with False or NaN + peak_mask_at_center = np.zeros(y.shape,dtype='bool') # boolean array, true at all peak centers + peak_mask_at_region = np.zeros(y.shape,dtype='bool') # boolean array, true at all points within peak region(s) + volume_change_at_center = np.zeros(y.shape,dtype='float32') * np.nan # the change in volume (V(left base) - V(peak center)) value placed in an array at the peak center index + volume_change_at_region = np.zeros(y.shape,dtype='float32') * np.nan # the change in volume (V(left base) - V(peak center)) value placed in an array at all indices in the peak region + peak_widths_at_center = np.zeros(y.shape,dtype='float32') * np.nan # the width of the peak at the peak center index + peak_widths_at_region = np.zeros(y.shape,dtype='float32') * np.nan # the width of the peak at all indices in the peak region + peak_id_at_center = np.zeros(y.shape,dtype='float32') * np.nan # the peak id at the peak center index + peak_id_at_region = np.zeros(y.shape,dtype='float32') * np.nan # the peak id at all indices in the peak region + if len(peaks)>0: + for pi, peak in enumerate(peaks): + + left_base = props['left_bases'][pi].copy() + right_base = props['right_bases'][pi].copy() + left_volume_change = y2[peak] - y2[left_base] + peak_indices = np.arange(left_base,right_base+1,1,dtype='uint16') + peak_width = right_base - left_base + + peak_above_threshold = left_volume_change < volume_change_threshold if peak_str=='dips' else left_volume_change > volume_change_threshold + if not peak_above_threshold: + continue + peak_mask_at_center[peak] = True + peak_mask_at_region[peak_indices] = True + + volume_change_at_center[peak] = left_volume_change + volume_change_at_region[peak_indices] = left_volume_change + + peak_widths_at_center[peak] = peak_width + peak_widths_at_region[peak_indices] = peak_width + + peak_id_at_center[peak] = pi + peak_id_at_region[peak_indices] = pi + + # take minimum of peak_magnitude if peak_str is dips () + func = np.nanmin if peak_str=='dips' else np.nanmax + max_val = func(volume_change_at_center) if len(peaks)>0 else np.nan + max_volume_change = np.ones(y.shape,dtype='float32') * max_val + + number_of_peaks = np.ones(y.shape,dtype='float32') * len(peaks) + + return { + f'volume_{peak_str}_peak_mask_at_region':peak_mask_at_region, # boolean array, true at all points within peak region(s) + f'volume_{peak_str}_peak_mask_at_center':peak_mask_at_center, # boolean array, true at all peak centers + f'volume_{peak_str}_has_peak':has_peak_bool_array, #boolean array, true at all points if there is a peak + f'volume_{peak_str}_volume_change_at_center':volume_change_at_center, # magnitude value at each peak center (left_base - peak) + f'volume_{peak_str}_volume_change_at_region':volume_change_at_region, # magnitude values at all points within peak region(s) (left_base - peak) + f'volume_{peak_str}_width_at_center':peak_widths_at_center, # width of the peak at the peak center index + f'volume_{peak_str}_width_at_region':peak_widths_at_region, # width of the peak at all indices in the peak region + f'volume_{peak_str}_max_volume_change':max_volume_change, # maximum magnitude value (at all points in array) + f'volume_{peak_str}_peak_id_at_center':peak_id_at_center, # peak id at the peak center index + f'volume_{peak_str}_peak_id_at_region':peak_id_at_region, # peak id at all indices in the peak region + f'volume_{peak_str}_total_number':number_of_peaks, # total number of peaks + } + +def find_peaks_and_collect_features(vol_det_array, vol_array, index_sequence_vec, track_id_vec, peak_str='dips'): + """ + run the peak finding algorithm for each track_id (column) in detrended volume data (vol_det_array) and extract features from peaks (using both vol_det_array and vol_array) + the collect the outputs and return a dataframe + + Parameters + ---------- + vol_det_array : np.array + detrended volume data (smoothed volume - fit volume), but can be any data that has peaks + vol_array : np.array + volume data (smoothed volume), used to compute the peak magnitude + index_sequence_vec : np.array + index sequence values for the volume data + track_id_vec : np.array + track_id values for the volume data (columns) + peak_str : str + string to use for the peak type ('dips' or 'jumps') + + Returns + ------- + pd.DataFrame + dataframe with columns ['track_id','index_sequence'] + + columns defined in find_and_remove_from_pivot + + """ + + # iterate through each track_id (will be slow if not using full tracks) and find peaks + out = [find_and_remove_from_pivot(vol_det_array[:,track], vol_array[:,track], min_index=index_sequence_vec.min(), peak_str=peak_str) for track in range(vol_det_array.shape[1])] + + # convert the output to a dataframe + dfout_list = [pd.DataFrame(x.values(),columns = index_sequence_vec, index=x.keys()).T for x in out] + dfout_list = [x.reset_index().rename(columns={'index':'index_sequence'}).set_index('index_sequence') for x in dfout_list] + dfout = pd.concat(dfout_list,axis=0,keys=track_id_vec, names=['track_id']).reset_index() + + return dfout + +def filter_out_volume_dips(dfd, volume_cols, find_dips=True, use_detrended=True, return_intermediates=False): + """ + Remove the volume dips from the volume data + + STEPS: + - inputs are volume trajectory and power law fit (used for detrending) + - both inputs are interpolated so there is a value at each point in time (necessary for smoothing) + - volume trajectory is smoothed using savgol filter + - smoothed volume trajectory is detrended by subtracting the power law fit + - search for volume dips by putting inverse of detrended smoothed volume trajectory into scipy.signal.find_peaks + - peak regions are removed from the volume trajectories by filling with nans. + + Parameters + ---------- + dfd : pd.DataFrame + dataframe with columns ['track_id','index_sequence'] + volume_cols + volume_cols : list + list of columns to needed to find and filter out volume dips + default is ['volume','power_fit_volume'], fit_volume is used to detrend the volume data + find_dips : bool + if True, find and remove the volume dips (input to peak finder is inverse of detrended+smoothed volume) + use_detrended : bool + if True, use the detrended volume data (subtracted by the power law fit volume) + return_intermediates : bool + if True, return the intermediate dataframes for validation/visualization + + Returns + ------- + pd.DataFrame + dataframe with columns ['track_id','index_sequence','CellId'] + + [ + # intermediate columns (only kept if return_intermediates is True) + f"input_{peak_str}", # input to peak finder + f"volume_smooth", # smoothed volume data + f"volume_interpolated", # interpolated volume data + f"fit_volume_interpolated", # interpolated fit_volume data + f"volume_smooth_detrended", # detrended volume data + + # values from find_peaks_and_collect_features + f'volume_{peak_str}_peak_mask_at_region', # boolean array, true at all points within peak region(s) + f'volume_{peak_str}_peak_mask_at_center', # boolean array, true at all peak centers + f'volume_{peak_str}_has_peak', # boolean array, true at all points if there is a peak + f'volume_{peak_str}_volume_change_at_center', # magnitude value at each peak center (left_base - peak) + f'volume_{peak_str}_volume_change_at_region', # magnitude values at all points within peak region(s) (left_base - peak) + f'volume_{peak_str}_width_at_center', # width of the peak at the peak center index + f'volume_{peak_str}_width_at_region', # width of the peak at all indices in the peak region + f'volume_{peak_str}_max_volume_change', # maximum magnitude value (at all points in array) + f'volume_{peak_str}_peak_id_at_center', # peak id at the peak center index + f'volume_{peak_str}_peak_id_at_region', # peak id at all indices in the peak region + f'volume_{peak_str}_total_number', # total number of peaks + + + # values after peak regions are removed + f"volume_{peak_str}_removed_um_unfilled", # dips or jumps removed (refilled with nans) + ] + + where {} is the peak_str ('dips' or 'jumps') + + + """ + + # initialize dictionary to store dataframes for each step + dfdict = {} + volume_col = volume_cols[0] + fit_volume_col = volume_cols[1] + + # create a pivot dataframe where the index is index_sequence and the columns are track_id and the values are on of the columns from volume_cols + dfp = dfd.pivot(index="index_sequence", columns="track_id", values=volume_cols) + + # ensure that all timpepoints are present in dfp index_sequence + if dfp.index.values.tolist() != list(range(dfp.index.values.min(), dfp.index.values.max() + 1)): + # if not, fill in missing timepoints with np.nan + dfp = dfp.reindex(index=range(dfp.index.values.min(), dfp.index.values.max() + 1)) + + # interpolate the missing internal nan values while keeping the beginning and ending stretch of nans + # (this is important so the savgol filter will not have gaps) + dfp = dfp.interpolate(method='linear', axis=0, limit_area='inside') + + # add the interpolated volume data to the dictionary + dfdict.update({'volume_interpolated':dfp[volume_col]}) # add interpolated volume data to dictionary + dfdict.update({'fit_volume_interpolated':dfp[fit_volume_col]}) # add interpolated fit_volume data to dictionary (used to detrend the volume data) + + # now apply savitzky golay filter to each column + # rescale the y values to um^3 (from pixels^3) + yscale, _, _, _ = get_plot_labels_for_metric(volume_col) + + #step 1: apply savgol filter to each column + dfp_vol_smooth = dfp[volume_col].apply(lambda x: savgol_filter(x*yscale, window_length=12, polyorder=2, mode='constant',cval=np.nan), axis=0) + dfdict.update({'volume_smooth':dfp_vol_smooth}) # add smoothed volume data to dictionary + + # step 2: detrend the data by subtracting the power law fit volume + dfp_vol_fit = dfp[fit_volume_col] + dfp_vol_smooth_detrended = dfp_vol_smooth - dfp_vol_fit + dfdict.update({'volume_smooth_detrended':dfp_vol_smooth_detrended}) # add detrended volume data to dictionary + + # step 3: invert the input data (smoothed volume or detrended data) to find the peaks (if find_dips is True) + drop_scale = -1 + if find_dips: + drop_scale = -1 + peak_str = "dips" + else: + peak_str = "jumps" + drop_scale = 1 + + # use smoothed+detrended data or just smoothed data + input = dfp_vol_smooth_detrended * drop_scale if use_detrended else dfp_vol_smooth * drop_scale + dfdict.update({f'input_{peak_str}':input}) + + # step 4: perform peak finding and get peak features + out = find_peaks_and_collect_features(input.values, dfp[volume_col].values * yscale, input.index.values, input.columns.values, peak_str=peak_str) + + # combine the output into a pivot table + cols = [x for x in out.columns if x not in ['index_sequence','track_id']] + dfpeaks = out.pivot(index="index_sequence", columns="track_id", values=cols) + + # unpack the dataframes containing the peak features into the dictionary + for col in cols: + dfdict.update({f'{col}':dfpeaks[col]}) + + ############################################################ + # step 5: remove the peaks from the volume data + dfp_mask = dfdict[f'volume_{peak_str}_peak_mask_at_region'].astype('bool') + nanmask = np.ones(dfp_mask.shape, dtype='float') + mask_values = dfp_mask.values + nanmask[mask_values] = np.nan + + dfp_vol2_unfilled = dfp['volume'] * nanmask + dfp_vol2_unfilled = dfp_vol2_unfilled * yscale # convert to um^3 + dfdict.update({f'volume_{peak_str}_removed_um_unfilled':dfp_vol2_unfilled}) + + # step 6: collect the results (and specific peak features) + key_list = list(dfdict.keys()) + if not return_intermediates: + intermediate_list = [ + 'volume_interpolated', + 'fit_volume_interpolated', + 'volume_smooth', + 'volume_smooth_detrended', + f'input_{peak_str}', + ] + key_list = [x for x in key_list if x not in intermediate_list] + + for i,key in enumerate(key_list): + df = dfdict[key] + if i == 0: + dfm = df.stack().reset_index() + dfm.rename(columns={0:key},inplace=True) + else: + dfm1 = df.stack().reset_index() + dfm1.rename(columns={0:key},inplace=True) + dfm = dfm.merge(dfm1,on=['index_sequence','track_id'],how='outer') + + + # set the data types (because they are lost in the pivot operation) + for col in dfm.columns: + if col in [f'volume_{peak_str}_peak_mask_at_region',f'volume_{peak_str}_peak_mask_at_center',f'volume_{peak_str}_has_peak']: + dfm[col] = dfm[col].astype('bool') + elif col in ['track_id','index_sequence']: + dfm[col] = dfm[col].astype('int') + else: + dfm[col] = dfm[col].astype('float') + + # now recover the CellId values + dfmi = dfm.set_index(["index_sequence", "track_id"]) + dfdi = dfd.set_index(["index_sequence", "track_id"], drop=False) + + # find all dfmi index values NOT in dfdi index values + # this is the set of index values that are not in the original dataframe + # they are added during the pivot operation + # we will drop these rows + not_in_dfdi = dfmi.index.difference(dfdi.index) + dfmi.drop(not_in_dfdi, inplace=True) + + dfmi.loc[dfmi.index.values, "CellId"] = dfdi.loc[dfmi.index.values, "CellId"] + + if return_intermediates: + return dfmi.reset_index().set_index("CellId") + else: + return dfmi.reset_index().set_index("CellId") + + +# %% +def run_script(df=None,volume_cols=['volume','power_fit_volume'],use_detrended=True,return_intermediates=False): + """ + run the workflow to find, characterize, and remove volume dips from the volume data + + Parameters + ---------- + df : pd.DataFrame + dataframe on which to compute change_over_time + with columns ['colony','track_id','index_sequence','label_img']+time_colsion + volume_cols : list + list of columns to needed to find and filter out volume dips + use_detrended : bool + if True, use the detrended volume data (subtracted by the power law fit volume) + return_intermediates : bool + if True, return the intermediate dataframes for validation/visualization + prefix : str + string to add to the beginning of the new columns + + Returns + ------- + pd.DataFrame + dataframe with volume dips filtered out values for each track at each time point + new columns are + + [ + # intermediate columns (only kept if return_intermediates is True) + f"input_{peak_str}", # input to peak finder + f"volume_smooth", # smoothed volume data + f"volume_interpolated", # interpolated volume data + f"fit_volume_interpolated", # interpolated fit_volume data + f"volume_smooth_detrended", # detrended volume data + + # values from find_peaks_and_collect_features + f'volume_{peak_str}_peak_mask_at_region', # boolean array, true at all points within peak region(s) + f'volume_{peak_str}_peak_mask_at_center', # boolean array, true at all peak centers + f'volume_{peak_str}_has_peak', # boolean array, true at all points if there is a peak + f'volume_{peak_str}_volume_change_at_center', # magnitude value at each peak center (left_base - peak) + f'volume_{peak_str}_volume_change_at_region', # magnitude values at all points within peak region(s) (left_base - peak) + f'volume_{peak_str}_width_at_center', # width of the peak at the peak center index + f'volume_{peak_str}_width_at_region', # width of the peak at all indices in the peak region + f'volume_{peak_str}_max_volume_change', # maximum magnitude value (at all points in array) + f'volume_{peak_str}_peak_id_at_center', # peak id at the peak center index + f'volume_{peak_str}_peak_id_at_region', # peak id at all indices in the peak region + f'volume_{peak_str}_total_number', # total number of peaks + + # values after peak regions are removed + f"volume_{peak_str}_removed_um_unfilled", # dips or jumps removed (refilled with nans) + ] + + """ + + assert df.index.name == "CellId" + dforig = df.copy() + + # only keep the necessary columns; remove shcoeffs columns to dataframe is no so large + # CellId becomes a column too after reset_index + dfd = df[ + ["colony", "track_id", "index_sequence", "label_img"] + volume_cols + ].reset_index() + + # convert all time_cols to float32 + dfd[volume_cols] = dfd[volume_cols].astype(np.float32) + + # returns dfo with index=CellId + dfo = filter_out_volume_dips(dfd, volume_cols,True,use_detrended,return_intermediates) + new_columns = [x for x in dfo.columns.tolist() if x not in dforig.columns.tolist()] + + # add new columns to original dataframe + dforig.loc[dfo.index.values, new_columns] = dfo.loc[dfo.index.values, new_columns] + assert dforig.index.name == "CellId" + return dforig diff --git a/nuc_morph_analysis/analyses/volume/plot_help.py b/nuc_morph_analysis/analyses/volume/plot_help.py new file mode 100644 index 00000000..3b88bc99 --- /dev/null +++ b/nuc_morph_analysis/analyses/volume/plot_help.py @@ -0,0 +1,532 @@ +import matplotlib.pyplot as plt +import numpy as np +from nuc_morph_analysis.lib.visualization import plotting_tools +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric + +# define cell cycle colors +cmap = plt.get_cmap(name='plasma') +# CYCLE_COLOR_DICT = {0:cmap.colors[5],1:cmap.colors[90],2:cmap.colors[220], 3:(0,0,1), 4:(1,0,0)} +CYCLE_COLOR_DICT = {0:cmap(5),1:cmap(90),2:cmap(220), 3:(0,0,1), 4:(1,0,0)} + +def adjust_axis_positions(fig,ax,curr_pos=None,width=1,height=0.7,space=0.075,keep_labels=False, horizontal=True): + """ + adjust the position of the axes in this code + + Parameters + ---------- + fig : plt.Figure + ax : list of plt.Axes + curr_pos : list, optional + [x,y,width,height] in figure coordinates. The default is None. + width : float, optional + width of the axis in inches. The default is 1. + height : float, optional + height of the axis in inches. The default is 0.7. + space : float, optional + space between axes in inches. The default is 0.075. + + Returns + ------- + fig : plt.Figure + ax : list of plt.Axes + """ + # now adjust axis positions + fw,fh = fig.get_size_inches() + for ci,cax in enumerate(ax): + # make the axis = 1.0" wide x 0.7" tall + if curr_pos is None: + curr_pos = [1,1 + height,width,height] + else: + if horizontal: + curr_pos = [curr_pos[0] + width + space ,curr_pos[1],width,height] + else: + curr_pos = [curr_pos[0],curr_pos[1] - height - space, width, height] + # now adjust curr_pos to be in figure coordinates + curr_pos_fig = [curr_pos[0]/fw,curr_pos[1]/fh,curr_pos[2]/fw,curr_pos[3]/fh] + cax.set_position(curr_pos_fig) + + if horizontal: + if (ci>0) & (not keep_labels): + # remove ytick labels + cax.set_yticklabels([]) + cax.set_ylabel('') + else: + if (ci < len(ax)-1) & (not keep_labels): + # remove xtick labels + cax.set_xticklabels([]) + cax.set_xlabel('') + if (ci>0) & (not keep_labels): + cax.set_title('') + return fig,ax + +def group_and_extract(dfcc,xcol,ycol): + """ + function for grouping and extracting the mean and 90% interpercentile range + of the value (ycol) binned by xcol from dfcc + + Parameters + ---------- + dfcc : pd.DataFrame + dataframe to group + xcol : str + column to bin by (e.g. 'index_sequence') + ycol : str + column to extract (e.g. 'dxdt_48_volume') + + Returns + ------- + dfg : pd.DataFrame + dataframe after grouping. + contains columns 'nanmean','nanstd','count','5th','95th' + """ + grouper = dfcc.groupby(xcol) + dfg = grouper[ycol].agg([np.nanmean,np.nanstd,'count',lambda x: np.nanpercentile(x,5),lambda x: np.nanpercentile(x,95)]) + dfg.rename(columns={'':'5th','':'95th'},inplace=True) + return dfg + +def plot_dfg(dfcc,xcol,ycol,labelstr,curr_ax,plot_type='mean',colorby=None,required_N=10): + """ + plot the mean of the value (ycol) binned by xcol from dfcc + along with the 90% interpercentile range + + + Parameters + ---------- + dfcc : pd.DataFrame + dataframe to plot + xcol : str + column to bin by + ycol : str + column to plot + labelstr : str + label for legend + curr_ax : plt.Axes + axis to plot on + plot_type : str, optional + 'mean' or 'count'. The default is 'mean'. + colorby : str, optional + color to plot. The default is None. + can be 'colony','cellcycle', or a color + required_N : int, optional + required number of valid datapoitns for averaging at a given timepoint to be included + + Returns + ------- + curr_ax : plt.Axes + axis with plot + """ + # remove rows with less than 10 counts + dfg = group_and_extract(dfcc,xcol,ycol) + dfgindex = dfg['count']= required_N] + + xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol) + yscale,ylabel,yunit,_ = get_plot_labels_for_metric(ycol) + + x = dfg.index * xscale + y = dfg['nanmean'].values * yscale + ylo = dfg['5th'].values * yscale + yhi = dfg['95th'].values * yscale + + color = None + if colorby is not None: + if colorby == 'colony': + color =plotting_tools.COLONY_COLORS[dfcc['colony'].values[0]] + elif colorby == 'cellcycle': + color = CYCLE_COLOR_DICT[dfcc['cell_cycle'].values[0]] + else: + color = colorby + + assert type(curr_ax) == plt.Axes + + if plot_type=='mean': + curr_ax.plot(x,y,label=labelstr, color = color, linewidth=0.5) + curr_ax.fill_between(x,ylo,yhi,alpha=0.2, color = color, edgecolor='none') + + elif plot_type=='count': + curr_ax.plot(x,dfg['count'].values,label=labelstr, linewidth=0.5, color = color) + + # adjust x axis details + curr_ax.set_xlabel(f"Movie time {xunit}") + if xcol == 'index_sequence': + curr_ax.set_xticks(np.arange(0,60,10)) + curr_ax.set_xlim(0,48) + elif xcol == 'dig_time': + curr_ax.set_xticks(np.arange(0,1.2,0.2)) + curr_ax.set_xlim(0,1) + + # adjust y axis details + if plot_type == 'mean': + curr_ax.set_ylabel(f"(90% interpercentile range)\nAvg. nuclear transient\ngrowth rate {yunit}") + elif plot_type == 'count': + curr_ax.set_ylabel(f"Counts") + + if (ycol in ['dxdt_48_volume','dxdt_48_volume_dips_removed_um_unfilled']) & (plot_type == 'mean'): + curr_ax.set_yticks(np.arange(-20,80,20)) + curr_ax.set_ylim(-5,70) + if ('per_V' in ycol) & (plot_type == 'mean'): + curr_ax.set_yticks(np.arange(-0,0.1,0.02)) + curr_ax.set_ylim(0.012,0.07) + elif (ycol == 'volume') & (plot_type == 'mean'): + curr_ax.set_ylim(400,1200) + + return curr_ax + +def update_plotting_params(fs=7,fw=6.5,fh=8): + plt.rcParams.update({'font.size': fs}) + plt.rcParams.update({'axes.titlesize': fs}) + plt.rcParams.update({'axes.labelsize': fs}) + plt.rcParams.update({'xtick.labelsize': fs}) + plt.rcParams.update({'ytick.labelsize': fs}) + plt.rcParams.update({'legend.fontsize': fs}) + # set axis linewidth + plt.rcParams.update({'axes.linewidth': 0.5}) + plt.rcParams.update({'lines.linewidth': 0.7}) + + # remove top and right axis lines + plt.rcParams.update({'axes.spines.top': False}) + plt.rcParams.update({'axes.spines.right': False}) + + # reduce the tick length + plt.rcParams.update({'xtick.major.size': 1.5}) + plt.rcParams.update({'ytick.major.size': 1.5}) + return fs,fw,fh + + +#%% +# plot 1: neighbors volume over time +def plot_neighbors_volume_over_time(dfcolony,track_id_list,xcol='index_sequence',ycol='volume',fw=6.5,fh=8): + fig,ax = plt.subplots(1,1,figsize=(fw,fh)) + axlist = np.asarray([ax]) if type(ax) != np.ndarray else ax + assert type(axlist) == np.ndarray # for mypy + + # plot volume over time for all tracks + xscale, _, _, _ = get_plot_labels_for_metric(xcol) + yscale, ylabel, yunit, _ = get_plot_labels_for_metric(ycol) + for ti, track_id in enumerate(track_id_list): + dftrack = dfcolony[dfcolony["track_id"] == track_id] + x = dftrack[xcol].values * xscale + y = dftrack[ycol].values * yscale + ax.plot(x, y, label=f"{track_id}",zorder=-100,linewidth=0.5) + ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) + fig,axlist = adjust_axis_positions(fig,axlist,curr_pos=None,width=1,height=1,space=0.075) + + # set x axis limits and ticks based on all neighbor tracks + dfall = dfcolony[dfcolony['track_id'].isin(track_id_list)] + dfall.dropna(subset=[xcol],inplace=True) + xmin,xmax = dfall[xcol].min() * xscale, dfall[xcol].max() * xscale + + # adjust X and Y axis limits and ticks + ax.set_xticks(np.arange(0,48+12,6)) + ax.set_xlim(xmin-1,xmax+1) + ax.set_yticks(np.arange(0,2000,200)) + ax.set_ylim(0,1400) + + # add title and labels + ax.set_title('immediate neighbors of main track') + ax.set_xlabel('Movie time (hr)') + ax.set_ylabel(f"{ylabel} {yunit}") + + return fig,axlist + + +def plot_tracks_aligned_at_volume_drop_onset(dfcolony,track_id_list,MAIN_TRACK_ID,timepoint,xcol='index_sequence',ycol='volume',interval=5,shift=5,fw=6.5,fh=8): + # interval = 5 #minutes per frame + # shift = 5 #frames + fig,ax = plt.subplots(1,1,figsize=(fw,fh)) + axlist = np.asarray([ax]) if type(ax) != np.ndarray else ax + + yscale, _, yunit, _ = get_plot_labels_for_metric(ycol) + # now plot the neighbors in color + for ti, track_id in enumerate(track_id_list): + dftrack = dfcolony[dfcolony["track_id"] == track_id] + x0 = dftrack[xcol].values + y0 = dftrack[ycol].values + + # find the index of the timepoint + idx = np.where(x0 == (timepoint - shift))[0][0] + x_at_drop_onset = x0[idx] + y_at_drop_onset = y0[idx] + + x = (x0 - x_at_drop_onset )* interval + y = (y0 - y_at_drop_onset) * yscale + + ax.plot(x, y, label=f"{track_id}",zorder=-100,marker='.',markersize=1) + + ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) + fig,axlist = adjust_axis_positions(fig,axlist,curr_pos=None,width=1,height=1,space=0.075) + + ax.set_xticks(np.arange(-75,100,25)) + ax.set_xlim(-25,85) + ax.set_yticks(np.arange(-300,400,100)) + ax.set_ylim(-300,150) + + ax.set_xlabel('Time relative to\ndrop onset (min)') + ax.set_ylabel(f"Change in volume relative\nvolume at drop onset {yunit}") + + return fig,axlist + + +def _set_labels(ax,xcol,ycol1): + """ + used in fit_volume_smooths_out_punching.py + """ + # get labels + xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol) + yscale1,ylabel1,yunit1,_ = get_plot_labels_for_metric(ycol1) + ax.set_xlabel(f"{xlabel} {xunit}") + ax.set_ylabel(f"{ylabel1} {yunit1}") + if 'dxdt' in ycol1: + ax.set_ylabel(f"Nuclear transient\ngrowth rate (μm\u00B3)") + + # adjust y-axis + if ycol1 == 'volume': + ax.set_yticks(np.arange(0,1400,200)) + ax.set_ylim(300,1200) + elif ycol1 == 'dxdt_48_volume': + ax.set_yticks(np.arange(-20,100,20)) + ax.set_ylim(-5,80) + + # adjust x-axis + ax.set_xlabel(f"Synchonrized nuclear\ngrowth time (hr)") + return ax + +def _plot_lines(df_track,xcol,ycol1,ycol2,ax): + """ + used in fit_volume_smooths_out_punching.py + """ + xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol) + yscale1,ylabel1,yunit1,_ = get_plot_labels_for_metric(ycol1) + yscale2,ylabel2,yunit2,_ = get_plot_labels_for_metric(ycol2) + + x = df_track[xcol].astype('float').values + transition = df_track["frame_transition"].astype('float').min() + x -= transition + x *= xscale + + y1 = df_track[ycol1].values * yscale1 + y2 = df_track[ycol2].values * yscale2 + + ax.plot(x,y1,'k-',label=f"{ylabel1}") + ax.plot(x,y2,'r-',label=f"{ylabel2}") + + ax = _set_labels(ax,xcol,ycol1) + + return ax + + + +def plot_track_with_fit_line(df_track,ax,xcol='index_sequence',ycol1='volume',ycol2='volume_dips_removed_um_unfilled'): + """ + used in fit_volume_smooths_out_punching.py + """ + + ax = _plot_lines(df_track,xcol,ycol1,ycol2,ax) + return ax + + +#%% figure plotting volume trajectory +# create figure and axes +def plot_track_with_volume_dip(ax,df0,main_track_id,xcol='index_sequence',ycol='volume',start_at_0=True,add_time_point_lines=False,timepoint=None): + """ + used for fit_volume_smooths_out_punching.py to show the effect of volume dip on a track volume and its dxdt_48_volume + """ + dftrack = df0[df0["track_id"] == main_track_id] + xscale, xlabel, xunit, _ = get_plot_labels_for_metric(xcol) + yscale, ylabel, yunit, _ = get_plot_labels_for_metric(ycol) + + x0 = dftrack[xcol].values + transition = dftrack["frame_transition"].astype('float').min() + if start_at_0: + x = (x0 - transition)*xscale + else: + x = (x0)*xscale + + y = dftrack[ycol].values * yscale + ax.plot(x, y, 'k-', label=f"track {main_track_id}",linewidth=0.7) + if add_time_point_lines: + tx1l_frame = timepoint-4*12 + tx1r_frame, tx2l_frame = timepoint,timepoint + tx2r_frame = timepoint+4*12 + + tx1_frame = np.asarray([tx1l_frame,tx1r_frame]) + tx2_frame = np.asarray([tx2l_frame,tx2r_frame]) + + if start_at_0: + tx1 = (tx1_frame - transition) * xscale + tx2 = (tx2_frame - transition) * xscale + else: + tx1 = tx1_frame * xscale + tx2 = tx2_frame * xscale + ty1 = dftrack[dftrack['index_sequence'].isin(tx1_frame)][ycol].values * yscale + ty2 = dftrack[dftrack['index_sequence'].isin(tx2_frame)][ycol].values * yscale + ax.plot(tx1,ty1,'c-',linewidth=0.7) + ax.plot(tx2,ty2,'m-',linewidth=0.7) + ax.scatter(np.mean(tx1),np.mean(ty1),c='c',s=5) + ax.scatter(np.mean(tx2),np.mean(ty2),c='m',s=5) + + + + ax.set_xlabel(f"Synchonrized nuclear\ngrowth time (hr)") + ax.set_ylabel(f"{ylabel} {yunit}") + ax.set_title(f"track {main_track_id}") + ax.set_xticks(np.arange(0,20,4)) + ax.set_xlim(-2,np.max(x)) + + ax = _set_labels(ax,xcol,ycol) + + + return ax + + + + +def plot_dip_detection_validation(dftrack,peak_str = 'dips'): + # axis 0 is interpolated raw_volume, sg_smoothed volume and power law fit volume (steps 0 and step1) + # axis 1 is smoothed volume detrended by power law fit (and detected peaks) + # and it would be cool to draw the peak magnitude as vertical line on the plot + # axis 2 is the raw volume with the peak annotated (and with the peak magnitude as vertical line) + # axis 3 is the raw volume with the peak removed + track_id = dftrack['track_id'].values[0] + transition = dftrack['frame_transition'] + dftrack['transition_time'] = dftrack['index_sequence'].copy() - transition + dftrack.set_index('index_sequence',inplace=True) + ncols = 2 + nrows = 1 + fig,ax = plt.subplots(nrows,ncols,figsize=(6.5,8)) + ax = np.asarray([ax]) if type(ax) != np.ndarray else ax + assert type(ax) == np.ndarray + + # ax0 + # plot raw volume on ax0 and ax2 + curr_ax = ax[0] + xscale, xlabel, xunit, _ = get_plot_labels_for_metric("index_sequence") + yscale, ylabel, yunit, _ = get_plot_labels_for_metric("volume") + x = dftrack.transition_time.values * xscale + y = dftrack['volume'].values * yscale + curr_ax.plot(x,y,'k',linewidth=1,label='raw volume') + curr_ax = ax[0] + y = dftrack['volume_smooth'].values + curr_ax.plot(x,y,'g',linewidth=0.7, label='smoothed') + + ycol = 'fit_volume_interpolated' + yscale = 1 + y = dftrack[ycol].values * yscale + curr_ax.plot(x,y,label='power law fit',color='m',linewidth=0.7,linestyle='-') + curr_ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.3), + handlelength=1,markerscale=1,frameon=False) + curr_ax.set_ylabel(f"{ylabel} {yunit}") + curr_ax.set_title('step1:\nsmooth volume trajectory\nand detrend with power law fit') + curr_ax.set_yticks([400,600,800,1000,1200]) + curr_ax.set_ylim(400,1200) + + # ax1 + curr_ax = ax[1] + ycol = f'volume_smooth_detrended' + y = dftrack[ycol].values + curr_ax.plot(x,y,color='g',linewidth=0.7,label='smoothed & detrended',zorder=-100) + curr_ax.set_ylabel(f"Detrended volume {yunit}") + curr_ax.set_title('step2:\nfind (inverse) peaks in\ndetrended volume trajectory') + peaks = dftrack[f'volume_{peak_str}_peak_mask_at_center'] # boolean array + peaks = peaks[peaks>0].index + xpeak = dftrack.loc[peaks,'transition_time'] * xscale + ypeak = dftrack.loc[peaks,ycol] + curr_ax.scatter(xpeak,ypeak,color='m',marker='o',s=2,label='peak') + + mask = dftrack[f'volume_{peak_str}_peak_mask_at_region'] + xpeak_mask = dftrack.loc[mask,'transition_time'] * xscale + ypeak_mask = dftrack.loc[mask,ycol] * yscale + + curr_ax.plot(xpeak_mask,ypeak_mask,color='m',linestyle=':',linewidth=0.5, + zorder=1000,label='peak region') + + + curr_ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), + handlelength=1,markerscale=1,frameon=False) + curr_ax.set_yticks([-100,-50,0,50,100]) + curr_ax.set_ylim(-150,150) + + + + + # set xlimit of all axes + xlimmax = np.max([axx.get_xlim()[1] for axx in ax]) + for curr_ax in ax: + curr_ax.set_xticks(np.arange(0,20,4)) + curr_ax.set_xlim(-2,xlimmax) + curr_ax.set_xlabel(f"Synchronized nuclear growth time (hr)") + + # plt.suptitle(f"track {track_id}") + # fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=2,height=2,space=0.6,keep_labels=True) + return fig,ax + + + +def plot_dxdt_over_time_by_cell_cycle(dfc,ycol,xcol1='index_sequence',plot_type='mean',cell_cycle_width=0.2,cell_cycle_centers=[0.3,0.5,0.7],bin_labels=['Early','Mid','Late'],fw=6.5,fh=8): + """ + plot dxdt over time + + Parameters + ---------- + dfc : pd.DataFrame + dataframe to plot, must be for single colony + ycol : str + column to plot + xcol1 : str, optional + column to bin by. The default is 'index_sequence'. + plot_type : str, optional + 'mean' or 'count'. The default is 'mean'. + cell_cycle_width : float, optional + width of the cell cycle bin. The default is 0.2. + cell_cycle_centers : list, optional + centers of the cell cycle bins. The default is [0.3,0.5,0.7]. + bin_labels : list, optional + labels for the bins. The default is ['early','mid','late']. + fw : float, optional + width of the figure. The default is 6.5. + fh : float, optional + height of the figure. The default is 8. + + Returns + ------- + fig : plt.Figure + ax : list of plt.Axes + """ + cell_cycle_bins = [(cc-cell_cycle_width/2,cc+cell_cycle_width/2) for cc in cell_cycle_centers] + + nrows = 1 + ncols = len(cell_cycle_bins)+1 + + fig,ax = plt.subplots(nrows,ncols,figsize=(fw,fh)) + assert type(ax) == np.ndarray # for mypy + + + colony = dfc['colony'].values[0] + + # first plot the whole colony by itself on an axis + curr_ax = ax[0] + curr_ax = plot_dfg(dfc,xcol1,ycol,f"{colony.capitalize()}",curr_ax,plot_type=plot_type,colorby='colony') + + curr_ax.legend(loc='lower left', + fontsize=6,frameon=False, + markerscale=1,handlelength=1, + labelspacing=0, + bbox_to_anchor=(-0.05,-0.1), + + ) + + for ri,cell_cycle_bin in enumerate(cell_cycle_bins): + curr_ax = ax[ri+1] + curr_ax = plot_dfg(dfc,xcol1,ycol,"",curr_ax,plot_type=plot_type,colorby='colony') + dfcc = dfc[(dfc['dig_time'] >= cell_cycle_bin[0]) & (dfc['dig_time'] <= cell_cycle_bin[1])] + + # top column + dfcc['cell_cycle'] = ri + curr_ax = plot_dfg(dfcc,xcol1,ycol,bin_labels[ri],curr_ax,plot_type=plot_type,colorby='k') + curr_ax.legend(loc='lower left', + fontsize=6,frameon=False, + markerscale=1,handlelength=0.5, + labelspacing=0, + bbox_to_anchor=(-0.05,-0.1), + ) + return fig,ax \ No newline at end of file diff --git a/nuc_morph_analysis/analyses/volume_variation/plot_features.py b/nuc_morph_analysis/analyses/volume_variation/plot_features.py index c5e91c1c..5e5d4ab4 100644 --- a/nuc_morph_analysis/analyses/volume_variation/plot_features.py +++ b/nuc_morph_analysis/analyses/volume_variation/plot_features.py @@ -290,6 +290,8 @@ def scatter_plot( else: df_d = df[df.colony == colony] + df_d = df_d.dropna(subset=[column_1, column_2]) # this step is necessary to get an accurate N value from len(df_d) + fig, ax = plt.subplots(1, 1, figsize=(5, 4), dpi=dpi) nas = np.logical_or(np.isnan(df_d[column_1]), np.isnan(df_d[column_2])) @@ -307,7 +309,6 @@ def scatter_plot( xscale, xlabel, xunits, xlim = get_plot_labels_for_metric(column_1) yscale, ylabel, yunits, ylim = get_plot_labels_for_metric(column_2) - if colorby_time == True: cscale, clabel, cunits, clim = get_plot_labels_for_metric("index_sequence") # cis = df_d['normalized_time'].values diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py index 440f1ab8..8495fc48 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_features.py @@ -1,5 +1,6 @@ from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric +from nuc_morph_analysis.lib.preprocessing.compute_change_over_time import run_script import numpy as np FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"} @@ -683,3 +684,41 @@ def add_mean_features(df, multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list] df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list) return df + +def add_volume_change_over_25_minute_window(df, bin_interval=5): + """ + Adds a new column to the dataframe that quantifies how much the volume has changed relative to + 25 minutes in the past (units are pixels^3) + this is useful for identifying volume dips in all tracks (see Fig S10) + + Parameters + ---------- + df : pandas.DataFrame + The input dataframe. + bin_interval : int + represents the number of frames to compute change in volume over + default is 5 frames, which is 25 minutes + + Returns + ------- + df : pandas.DataFrame + The dataframe with the new column 'volume_change_over_25_minutes' added. + (units are pixels^3) + """ + dfm = df.copy() + # run the compute_change_over_time workflow for a given bin_interval + dfm = run_script(dfm,['volume'], [bin_interval], time_location='end') + dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5 + + # now check that all columns in df have the same dtype as columns in dfm + for col in df.columns: + if dfm[col].dtype != df[col].dtype: + print(f"column {col} has dtype {dfm[col].dtype} in dfm and {df[col].dtype} in df") + + if dfm.shape[0] != df.shape[0]: + raise Exception( + f"The loaded manifest has {df.shape[0]} rows and your \ + final manifest has {dfm.shape[0]} rows.\ + Please revise code to leave manifest rows unchanged." + ) + return dfm \ No newline at end of file diff --git a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features.py b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features.py index 6e4506c2..f4c3435a 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features.py +++ b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features.py @@ -9,7 +9,7 @@ LOCAL_RADIUS_LIST = [90, -1] LOCAL_RADIUS_STR_LIST = ["90um", "whole_colony"] -NEIGHBOR_FEATURE_LIST = ["volume"] +NEIGHBOR_FEATURE_LIST = ["volume","dxdt_48_volume"] NEIGHBOR_PREFIX = "neighbor_avg_" @@ -104,6 +104,7 @@ def run_script( feature_list=NEIGHBOR_FEATURE_LIST, local_radius_list=LOCAL_RADIUS_LIST, exclude_outliers=True, + include_all_dxdt=False, ): """ Determine average values of features within neighborhoods of defined radius around each cell @@ -122,6 +123,8 @@ def run_script( -1 signifies the whole colony exclude_outliers : bool, optional whether to exclude outliers. The default is False. + include_all_dxdt : bool, optional + whether to include all dxdt columns in the dataframe. The default is False. Returns ------- @@ -144,7 +147,10 @@ def run_script( dfi = df[df["colony"] == colony] pass_cols = ["index_sequence", "colony", "track_id", "centroid_x", "centroid_y"] - columns = feature_list + [x for x in dfi.columns if "dxdt" in x] + if include_all_dxdt: + columns = feature_list + [x for x in dfi.columns if "dxdt" in x] + else: + columns = feature_list # first find the unique index_sequence values index_sequences = dfi["index_sequence"].unique() diff --git a/nuc_morph_analysis/lib/preprocessing/add_times.py b/nuc_morph_analysis/lib/preprocessing/add_times.py index 8d53c961..7496c5c4 100644 --- a/nuc_morph_analysis/lib/preprocessing/add_times.py +++ b/nuc_morph_analysis/lib/preprocessing/add_times.py @@ -425,3 +425,105 @@ def add_transition_point( print(f"{ntracks} full tracks in dataset after transition point added") return df + + +def determine_bin_centers(minval,maxval,number_of_bins=None,step_size=None): + """ + determine the bin centers for digitizing time + Note: user must specify either number_of_bins or step_size + + Parameters + ---------- + minval : float + minimum value of time + maxval : float + maximum value of time + number_of_bins : int + number of bins to use + step_size : float + step size to use + + Returns + ------- + np.array + bin centers + """ + if number_of_bins is not None: + bin_centers = np.linspace(minval,maxval,number_of_bins) + elif step_size is not None: + bin_centers = np.arange(minval,maxval+step_size,step_size) + else: + raise ValueError('Must specify either number_of_bins or step_size') + return bin_centers + +def digitize_time_array(time_array,bin_centers): + """ + convert an array of time values to a digitized array of time values with a specified number of bins + (or step size) + Note: user must specify either number_of_bins or step_size + + Parameters + ---------- + time_array : np.array + array of time values + number_of_bins : int + number of bins to use + step_size : float + step size to use + + Returns + ------- + np.array + digitized time array + """ + step_size = bin_centers[1] - bin_centers[0] + bin_edges = np.append(bin_centers-step_size/2,bin_centers[-1]+step_size/2) + dig_time_idxs = np.digitize(time_array,bin_edges,right=False) + dig_time = bin_centers[dig_time_idxs-1] + return dig_time + +def digitize_time_column(df, minval, maxval, number_of_bins=None, step_size=None, time_col = 'normalized_time', new_col='dig_time'): + """ + create a new column in the dataframe that digitizes input time column + Note: user must specify either number_of_bins or step_size + + Parameters + ---------- + df : pd.DataFrame + dataframe to use + minval : float + minimum value of time + maxval : float + maximum value of time + number_of_bins : int + number of bins to use + step_size : float + step size to use + + Returns + ------- + pd.DataFrame + dataframe with new column + """ + + time_array = df[time_col].values + bin_centers = determine_bin_centers(minval,maxval,number_of_bins=number_of_bins,step_size=step_size) + dig_time_array = digitize_time_array(time_array,bin_centers) + df[new_col] = dig_time_array + return df + +def validate_dig_time_with_plot(time_array = np.linspace(0,1,1000), number_of_bins=6): + """ + this visualizes how the input array is binned by plotting + the input array (x-axis) vs the digitized array (y-axis) + """ + + bin_centers = determine_bin_centers(0,1,number_of_bins=number_of_bins) + dig_time = digitize_time_array(time_array,bin_centers) + extrastr = '' + fig, ax = plt.subplots(figsize=(3,3)) + plt.plot(time_array,dig_time) + plt.xlabel('time') + plt.ylabel('dig_time') + plt.title(f'Validation of digitized time\n{number_of_bins} bins{extrastr}') + return fig, ax \ No newline at end of file diff --git a/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py b/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py index a3c628cf..f157338c 100644 --- a/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py +++ b/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py @@ -8,7 +8,50 @@ DXDT_PREFIX = "dxdt_" -def get_change_over_time_array(dfd, time_cols, bin_interval): +def compute_change_over_time_on_dataframe(dfpi,bin_interval,time_cols,prefix,time_location='center'): + """ + compute ∆V/∆t for all tracks in the dataframe, dfpi, with a bin_interval of bin_interval + + Parameters + ---------- + dfpi : pd.DataFrame + dataframe with columns ['track_id',time_cols] + bin_interval : int + number of frames to compute growth over + time_cols : list + list of columns on which to compute ∆V/∆t + prefix : str + prefix to add to the new columns + time_location : str + 'center' or 'end', determines where the change over time value is returned in the bin_interval + default is 'center' (e.g. for bin_interval=48, the change over time value is returned at middle timepoint (t=24)) + when 'end', the change over time value is returned at the last timepoint (t=48 for bin_interval=48) + this is important for the new feature `dxdt_5_volume_end` which gets renamed into dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5 + + Returns + ------- + pd.DataFrame + dataframe with columns ['track_id','index_sequence'] + [f"{prefix}{feature}{suffix}" for feature in time_cols] + """ + # now we want to compute the difference + # the difference is the value at timepoint t+bin_interval - the value at timepoint t + + if time_location=='center': + # because we want the difference centered at timepoint t, we will shift the difference by bin_interval//2 + diff = dfpi.diff(axis=0, periods=bin_interval).shift(-1 * bin_interval // 2) + suffix='' + elif time_location=='end': + diff = dfpi.diff(axis=0, periods=bin_interval) + suffix='_end' + + # now normalize the changes by the bin_interval + diff = diff / float(bin_interval) + # now transform diff back into the form of dfd + dfm = diff.stack().reset_index() + dfm = dfm.rename(columns={x: f"{prefix}{x}{suffix}" for x in time_cols}) + return dfm + +def get_change_over_time_array(dfd, time_cols, bin_interval, time_location='center'): """ Compute a rolling window-based change_over_time for all tracks. The change_over_time at T=t is computed as the difference between the values at t+bin_interval/2 divided and t-bin_interval/2 divided by bin_interval. @@ -34,32 +77,28 @@ def get_change_over_time_array(dfd, time_cols, bin_interval): # if not, fill in missing timepoints with np.nan dfp = dfp.reindex(index=range(dfp.index.values.min(), dfp.index.values.max() + 1)) - # now we want to compute the difference - # the difference is the value at timepoint t+bin_interval - the value at timepoint t - # because we want the difference centered at timepoint t, we will shift the difference by bin_interval//2 - diff = dfp.diff(axis=0, periods=bin_interval).shift(-1 * bin_interval // 2) - # now normalize the changes by the bin_interval - diff = diff / float(bin_interval) - - # now in addition to the volume level, add volume_per_V as a level to diff - diff = diff.join(diff[["volume"]].div(dfp["volume"]).rename(columns={"volume": "volume_per_V"})) - # now transform diff back into the form of dfd - dfm = diff.stack().reset_index() - dfm = dfm.rename(columns={x: f"{prefix}{x}" for x in time_cols}) - dfm = dfm.rename(columns={"volume_per_V": f"{prefix}volume_per_V"}) - + dfm = compute_change_over_time_on_dataframe(dfp, bin_interval, time_cols, prefix, time_location) # now drop rows with nan values - dfm = dfm.dropna(axis=0) - + # dfm = dfm.dropna(axis=0,how='all') + # now recover the CellId values dfmi = dfm.set_index(["index_sequence", "track_id"]) dfdi = dfd.set_index(["index_sequence", "track_id"]) + + # find all dfmi index values NOT in dfdi index values + # this is the set of index values that are not in the original dataframe + # they are added during the pivot operation + # we will drop these rows + not_in_dfdi = dfmi.index.difference(dfdi.index) + # print(f"dropping {len(not_in_dfdi)} rows") + dfmi.drop(not_in_dfdi, inplace=True) + dfmi.loc[dfmi.index.values, "CellId"] = dfdi.loc[dfmi.index.values, "CellId"] return dfmi.reset_index().set_index("CellId") # %% -def run_script(df=None, bin_interval=12, exclude_outliers=True): +def run_script(df=None, dxdt_feature_list = None, bin_interval_list=None, exclude_outliers=True, time_location='center'): """ run the compute_change_over_time workflow for a given bin_interval @@ -68,17 +107,29 @@ def run_script(df=None, bin_interval=12, exclude_outliers=True): df : pd.DataFrame dataframe on which to compute change_over_time with columns ['colony','track_id','index_sequence','label_img']+time_cols - bin_interval : int - integer that represents the number of frames to compute growth over + dxdt_feature_list : list + list of columns to compute growth on + bin_interval_list : list + list of integers, which represents the number of frames to compute growth over exclude_outliers : bool if True, exclude outlier time points from the growth rate calculation + time_location : str + 'center' or 'end', determines where the change over time value is returned in the bin_interval + default is 'center' (e.g. for bin_interval=48, the change over time value is returned at timepoint 24) + when 'end', the change over time value is returned at timepoint 0 ( this is important for the new feature + `dxdt_5_volume_end` which gets renamed into dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5) Returns ------- pd.DataFrame dataframe with change_over_time values for each track at each time point """ + if dxdt_feature_list is None: + dxdt_feature_list = DXDT_FEATURE_LIST + if bin_interval_list is None: + bin_interval_list = BIN_INTERVAL_LIST + assert df.index.name == "CellId" dforig = df.copy() if exclude_outliers: # to ensure that outlier datapoints are used for the growth rate calculation, filter out time point outliers here @@ -89,15 +140,18 @@ def run_script(df=None, bin_interval=12, exclude_outliers=True): # only keep the necessary columns; remove shcoeffs columns to dataframe is no so large # CellId becomes a column too after reset_index dfd = df[ - ["colony", "track_id", "index_sequence", "label_img"] + DXDT_FEATURE_LIST + ["colony", "track_id", "index_sequence", "label_img"] + dxdt_feature_list ].reset_index() # convert all time_cols to float32 - dfd[DXDT_FEATURE_LIST] = dfd[DXDT_FEATURE_LIST].astype(np.float32) + dfd[dxdt_feature_list] = dfd[dxdt_feature_list].astype(np.float32) # returns dfo with index=CellId - dfo = get_change_over_time_array(dfd, DXDT_FEATURE_LIST, bin_interval) - new_columns = [x for x in dfo.columns.tolist() if x not in dforig.columns.tolist()] - # add new columns to original dataframe - dforig.loc[dfo.index.values, new_columns] = dfo.loc[dfo.index.values, new_columns] + for bin_interval in bin_interval_list: + dfo = get_change_over_time_array(dfd, dxdt_feature_list, bin_interval, time_location) + new_columns = [x for x in dfo.columns.tolist() if x not in dforig.columns.tolist()] + # add new columns to original dataframe + dforig.loc[dfo.index.values, new_columns] = dfo.loc[dfo.index.values, new_columns] + assert dforig.index.name == "CellId" return dforig + diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py index f92400a4..8c2dde05 100644 --- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py +++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py @@ -20,6 +20,9 @@ from nuc_morph_analysis.analyses.height.add_colony_time import add_colony_time_all_datasets from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric from nuc_morph_analysis.lib.preprocessing import labeling_neighbors_helper +from nuc_morph_analysis.analyses.volume import filter_out_dips + + def load_dataset_with_features( dataset="all_baseline", @@ -54,10 +57,10 @@ def load_dataset_with_features( # Try to load the dataset from a local disk cache. This is risky: you have to make sure # you are deleting the cache manually when it is out of date, but it can save time. if dataset in ["all_baseline", "all_feeding_control", "all_drug_perturbation"]: - df_master = load_local_dataset(dataset, title="with_features") + df_master = load_local_dataset(dataset, title="with_features", remove_growth_outliers=remove_growth_outliers) else: experiment_group = load_data.get_dataset_experiment_group_by_name(dataset) - df_master = load_local_dataset(f"all_{experiment_group}", title="with_features") + df_master = load_local_dataset(f"all_{experiment_group}", title="with_features",remove_growth_outliers=remove_growth_outliers) df_master = df_master.loc[df_master.colony == dataset] except FileNotFoundError: load_local = False @@ -103,12 +106,12 @@ def load_dataset_with_features( print("WARNING!: Loading and saving local dataset with features.") print("this is a redundant operation and may not be necessary") if save_local: - write_local(df_master, dataset, "with_features") + write_local(df_master, dataset, "with_features", remove_growth_outliers=remove_growth_outliers) return df_master -def load_local_dataset(dataset, title): - filename = name_local_file(dataset, title) +def load_local_dataset(dataset, title, remove_growth_outliers=False): + filename = name_local_file(dataset, title=title, remove_growth_outliers=remove_growth_outliers) if os.path.exists(filename): print("WARNING!: Loading local dataset with features.") print("!!!This saves time but may not be the most recent version!!!") @@ -120,7 +123,7 @@ def load_local_dataset(dataset, title): return df_master -def name_local_file(dataset, title, destdir=None, format="parquet"): +def name_local_file(dataset, title, destdir=None, format="parquet", remove_growth_outliers=False): """ Parameters ---------- @@ -134,11 +137,14 @@ def name_local_file(dataset, title, destdir=None, format="parquet"): if destdir is None: destdir = Path(__file__).parent.parent.parent.parent / "data" os.makedirs(destdir, exist_ok=True) - filename = f"{destdir}/{dataset}_{title}.{format}" + if not remove_growth_outliers: + filename = f"{destdir}/{dataset}_{title}_with_growth_outliers.{format}" + else: + filename = f"{destdir}/{dataset}_{title}.{format}" return filename -def write_local(df, dataset, title, destdir=None, format="parquet"): +def write_local(df, dataset, title, destdir=None, format="parquet", remove_growth_outliers=False): """ Parameters ---------- @@ -148,7 +154,7 @@ def write_local(df, dataset, title, destdir=None, format="parquet"): format: str, optional "parquet" or "csv" """ - filename = name_local_file(dataset, title, destdir, format) + filename = name_local_file(dataset, title, destdir, format, remove_growth_outliers) if format == "parquet": df.to_parquet(filename, index=True) elif format == "csv": @@ -199,6 +205,8 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers): df = add_fov_touch_timepoint_for_colonies(df) df = add_features.add_non_interphase_size_shape_flag(df) df = add_change_over_time(df) + df = add_features.add_volume_change_over_25_minute_window(df) + df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers) df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers, feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"], @@ -260,7 +268,7 @@ def process_full_tracks(df_all, thresh, pix_size, interval): df_full = add_features.add_fold_change_track_fromB(df_full, "SA", "mesh_sa", pix_size**2) df_full = add_growth_features.add_early_growth_rate(df_full, interval) df_full = add_growth_features.add_late_growth_rate_by_endpoints(df_full) - df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "power") + df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "power",add_fit_volume=True) #add_fit_volume=True, used for volume dip detection df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "exponential") df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "linear") @@ -269,6 +277,11 @@ def process_full_tracks(df_all, thresh, pix_size, interval): df_full = add_features.add_feature_at(df_full, "frame_transition", 'height', 'height_percentile', pix_size) df_full = add_features.add_features_at_transition(df_full) df_full = add_features.get_early_transient_gr_of_neighborhood(df_full, scale=get_plot_labels_for_metric('neighbor_avg_dxdt_48_volume_90um')[0]) + + df_full = filter_out_dips.run_script(df_full) + df_full = compute_change_over_time.run_script(df_full, dxdt_feature_list=['volume_dips_removed_um_unfilled'], bin_interval_list=[48]) + df_full = add_neighborhood_avg_features.run_script(df_full, feature_list=['dxdt_48_volume_dips_removed_um_unfilled']) + df_full = add_features.sum_mitotic_events_along_full_track(df_full) df_full = add_features.normalize_sum_events(df_full) df_full = add_features.add_mean_features(df_full) @@ -313,9 +326,6 @@ def merge_datasets(df_all, df_full): "colony_non_circularity", "colony_non_circularity_scaled", "max_colony_depth", - "dxdt_48_volume_per_V", - "neighbor_avg_dxdt_48_volume_per_V_90um", - "neighbor_avg_dxdt_48_volume_per_V_whole_colony", "dataset", "height_percentile", "raw_full_zstack_path", @@ -343,7 +353,7 @@ def remove_columns(df, column_list=COLUMNS_TO_DROP): return df -def add_change_over_time(df): +def add_change_over_time(df, dxdt_feature_list=None, bin_interval_list=None): """ Adds new columns to the dataframe with the local rate of change for a given feature for a nucleus @@ -351,6 +361,10 @@ def add_change_over_time(df): ---------- df : pandas.DataFrame The input dataframe. + dxdt_feature_list : list + List of columns to compute growth rates for + bin_interval_list : list + List of integers, which represents the number of frames to compute growth over Returns ------- @@ -358,8 +372,7 @@ def add_change_over_time(df): The dataframe with the new column added. """ dfm = df.copy() - for bin_interval in compute_change_over_time.BIN_INTERVAL_LIST: - dfm = compute_change_over_time.run_script(dfm, bin_interval=bin_interval) + dfm = compute_change_over_time.run_script(dfm, dxdt_feature_list, bin_interval_list,) # now check that all columns in df have the same dtype as columns in dfm for col in df.columns: @@ -378,4 +391,14 @@ def add_change_over_time(df): # %% if __name__ == "__main__": for dataset in ["all_baseline", "all_feeding_control", "all_drug_perturbation"]: - df = load_dataset_with_features(dataset, load_local=False, save_local=True, num_workers=32) + df = load_dataset_with_features(dataset, + load_local=False, + save_local=True, + remove_growth_outliers=True, + num_workers=32) + df = load_dataset_with_features(dataset, + load_local=False, + save_local=True, + remove_growth_outliers=False, + num_workers=32) + diff --git a/nuc_morph_analysis/lib/visualization/example_tracks.py b/nuc_morph_analysis/lib/visualization/example_tracks.py index 32ea6b82..797140ed 100644 --- a/nuc_morph_analysis/lib/visualization/example_tracks.py +++ b/nuc_morph_analysis/lib/visualization/example_tracks.py @@ -23,4 +23,5 @@ "delta_v_BC_low": 86418, "transition_point_supplement": 82210, "sample_full_trajectories": [97942, 85296, 9808, 77656, 83322], + "volume_dip_example": 75725, } diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py index 108b9560..d6b07f08 100644 --- a/nuc_morph_analysis/lib/visualization/label_tables.py +++ b/nuc_morph_analysis/lib/visualization/label_tables.py @@ -56,7 +56,8 @@ def get_scale_factor_table(dataset="all_baseline"): ): pix_size, ("mesh_sa"): pix_size**2, - ("volume", "volume_sub"): pix_size**3, + ("volume", "volume_sub", "volume_change_over_25_minutes"): pix_size**3, + ("fit_volume"): 1, #already scaled in code ("density", "avg_density", "avg_early_density", "avg_late_density"): 1 / pix_size**2, ( @@ -89,9 +90,19 @@ def get_scale_factor_table(dataset="all_baseline"): ("2d_area_nuc_cell_ratio"): 1, } + # add a couple of dxdt columns + dict1.update({'dxdt_48_volume': 1/(time_interval_minutes/60)}) + dict1.update({'dxdt_48_volume_dips_removed_um_unfilled': 1/(time_interval_minutes/60)}) + dict1.update({'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_90um': 1/(time_interval_minutes/60)}) + dict1.update({'neighbor_avg_dxdt_48_volume_dips_removed_um_unfilled_whole_colony': 1/(time_interval_minutes/60)}) + + + + dict1.update({'dxdt_48_fit_volume_per_V': (1)/(time_interval_minutes/60)}) + # add non dxdt columns and other non-traditional columns - temp_dict = get_one_to_one_dict(dict1) hours_per_frame = time_interval_minutes / 60 + temp_dict = get_one_to_one_dict(dict1) for feature in DXDT_FEATURE_LIST: dict1.update( { @@ -134,6 +145,8 @@ def get_scale_factor_table(dataset="all_baseline"): dict1.update({f"dvdt_t2-dvdt_t1_neighbors_{bin_interval}_{local_radius_str}": 1}) dict1.update({f"dvdt_t2-dvdt_t1_self_{bin_interval}_{local_radius_str}": 1}) + # important for figure 5 supp: do NOT REMOVE + dict1.update({"dxdt_t2-dxdt_t1": temp_dict['volume'] / (hours_per_frame)}) return dict1 @@ -174,6 +187,7 @@ def get_scale_factor_table(dataset="all_baseline"): "tscale_linearityfit_volume": "Fitted time scaling factor (\u03B1)", "RMSE_linearityfit_volume": "Root mean squared error", "late_growth_rate_by_endpoints": "Growth rate", + "dxdt_48_volume": "Transient Growth Rate", "dxdt_t2-dxdt_t1": "Late average transient growth rate - early average transient growth rate", # Height "height": "Height", @@ -271,6 +285,9 @@ def get_scale_factor_table(dataset="all_baseline"): "2d_intensity_min_edge" : "Min distance to (pseudo)cell edge", "2d_intensity_mean_edge" : "Average distance to (pseudo)cell edge", "2d_intensity_max_edge" : "Max distance to (pseudo)cell edge", + + # dip event features + "volume_change_over_25_minutes": "Change in volume in 25 minute window", } @@ -414,6 +431,9 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "2d_intensity_min_edge" : "Min distance to (pseudo)cell edge", "2d_intensity_mean_edge" : "Average distance to (pseudo)cell edge", "2d_intensity_max_edge" : "Max distance to (pseudo)cell edge", + + # dip event features + "volume_change_over_25_minutes": "Change in volume in 25 minute window", } # units for quantities @@ -457,6 +477,7 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): "difference_volume_at_B", "difference_half_vol_at_C_and_B" "avg_sister_volume_at_B", "volume_sub", + "volume_change_over_25_minutes", ): "(μm\u00B3)", "SA_vol_ratio": "(μm⁻¹)", ( @@ -500,6 +521,7 @@ def convert_to_hr(bin_interval, dataset="all_baseline"): for feature in DXDT_FEATURE_LIST: UNIT_TABLE.update({f"dxdt_{bin_interval}_{feature}": f"({temp_dict[feature][1:-1]}/hr)"}) +temp_dict = get_one_to_one_dict(UNIT_TABLE) # now add the neighborhood columns for local_radius_str in LOCAL_RADIUS_STR_LIST: for feature in NEIGHBOR_FEATURE_LIST: diff --git a/nuc_morph_analysis/lib/visualization/matplotlib_to_axlist.py b/nuc_morph_analysis/lib/visualization/matplotlib_to_axlist.py new file mode 100644 index 00000000..e84ac71f --- /dev/null +++ b/nuc_morph_analysis/lib/visualization/matplotlib_to_axlist.py @@ -0,0 +1,22 @@ +from typing import List, Union, cast +from matplotlib.axes import Axes +from numpy.typing import NDArray + +def type_axlist(plt_axis_np: Union[Axes, NDArray]) -> List[Axes]: + """ + Figure.subplots returns Axes or array of Axes, but it's improperly typed + See discussion here: + https://github.com/numpy/numpy/issues/24738 + + Example: + >>> fig,axlist_untyped = plt.subplots(nrows=2,ncols=1,figsize=(6.5,8)) + >>> axlist: List[Axes] = type_axlist(axlist_untyped) + """ + # This function was adapted from this PR, licensed under BSD-3: + # https://github.com/pytorch/captum/pull/1416/files + plt_axis_list: List[Axes] = [] + if type(plt_axis_np) == Axes: + plt_axis_list = [plt_axis_np] + else: + plt_axis_list = cast(List[Axes], cast(NDArray, plt_axis_np).tolist()) + return plt_axis_list \ No newline at end of file diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py index 5b1d1aef..73956992 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py @@ -21,6 +21,7 @@ ) from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import ( load_dataset_with_features, + add_features, ) from nuc_morph_analysis.lib.visualization.write_mips_for_colorizer import ( save_colony_backdrop_mips, @@ -49,7 +50,6 @@ update_bounding_box_data, ) - @dataclass class NucMorphFeatureSpec: column_name: str @@ -335,12 +335,20 @@ class NucMorphFeatureSpec: NucMorphFeatureSpec("2d_perimeter_pseudo_cell"), NucMorphFeatureSpec("2d_solidity_pseudo_cell"), # extra - NucMorphFeatureSpec("inv_cyto_density"), - NucMorphFeatureSpec("2d_perimeter_nuc_cell_ratio"), - NucMorphFeatureSpec("2d_eccentricity_nuc_cell_ratio"), - NucMorphFeatureSpec("label_pseudo_cell"), - # extra old columns - NucMorphFeatureSpec("colony_depth", type=FeatureType.DISCRETE), + NucMorphFeatureSpec('inv_cyto_density'), + NucMorphFeatureSpec('2d_perimeter_nuc_cell_ratio'), + NucMorphFeatureSpec('2d_eccentricity_nuc_cell_ratio'), + NucMorphFeatureSpec('label_pseudo_cell'), + + + # volume dip columns + NucMorphFeatureSpec('colony_depth', type=FeatureType.DISCRETE), + NucMorphFeatureSpec('volume_dips_removed_um_unfilled'), + NucMorphFeatureSpec('dxdt_48_volume_dips_removed_um_unfilled'), + NucMorphFeatureSpec('volume_dips_has_peak'), + NucMorphFeatureSpec('volume_dips_max_volume_change'), + NucMorphFeatureSpec('volume_dips_volume_change_at_region'), + NucMorphFeatureSpec('volume_change_over_25_minutes') ], } @@ -421,6 +429,7 @@ def make_features( features: List[NucMorphFeatureSpec], dataset_name: str, writer: ColorizerDatasetWriter, + make_glossary: bool = False, ): """ Generate the outlier, track, time, centroid, and feature data files. diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md index 25341512..100982b1 100644 --- a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md +++ b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md @@ -30,4 +30,11 @@ To overwrite an existing segmented version of the dataset (for example to add/re ``` pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir {existing_output_dir_name} --noframes +pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_new/ --noframes + + + +pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_full/ --parallel + + ``` \ No newline at end of file diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py index f6217cc4..dd0d0162 100644 --- a/run_all_manuscript_workflows.py +++ b/run_all_manuscript_workflows.py @@ -16,7 +16,7 @@ def figure_s1_cell_health(): def figure_s2_s3_s18_segmentation_model_validation(): seg_model_validation_figure_workflow.save_out_specified_image_pairs_with_overlays() import nuc_morph_analysis.analyses.segmentation_model_validation.quantitative_validation_workflow - + # figure 2 images generated using timelapse feature exploroer def figure_3_s4_height_density(): @@ -36,9 +36,10 @@ def figure_s7_growth_outliers(): def figure_4_s8_volume_trajectories(): import nuc_morph_analysis.analyses.volume.figure_4_s8_workflow - def figure_5_s9_local_growth(): + def figure_5_s9_s10_local_growth(): import nuc_morph_analysis.analyses.volume.figure_5_s9_workflow - #s10 coming soon + import nuc_morph_analysis.analyses.volume.FigS10_workflow + def figure_6_s11_compensation(): import nuc_morph_analysis.analyses.volume_variation.figure_6_s11_workflow