AllenCell · jcass11 · Dec 4, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/nuc_morph_analysis/analyses/evaluate_filters_and_outliers/evaluate_time_binning.py b/nuc_morph_analysis/analyses/evaluate_filters_and_outliers/evaluate_time_binning.py
@@ -2,6 +2,3 @@
 from nuc_morph_analysis.lib.preprocessing import add_times
 fig,ax = add_times.validate_dig_time_with_plot()
 fig.show()
-#%%
-fig,ax = add_times.validate_dig_time_with_plot(old_method=True)
-fig.show()
diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/misc_neighbor_helper_functions.py b/nuc_morph_analysis/analyses/neighbor_of_X/misc_neighbor_helper_functions.py
@@ -59,11 +59,6 @@ def get_a_cells_neighbors_as_track_id_list(df0,main_track_id,TIMEPOINT,return_se
     # find all neighbors that are immediate neighbors (and have passed the transition point)
     immediate_neighbor_track_ids = find_immediate_neighbors(df_after_transition_only,main_track_id,TIMEPOINT)
 
-    # # compute distances between nuclei and sort them by distance to main track
-    # dftime = dfcolony[dfcolony["index_sequence"] == TIMEPOINT]
-    # dist = compute_distances_between_nuclei(dftime,MAIN_TRACK_ID)
-    # sorted_dist,sorted_cell_ids,sorted_track_ids,sorted_index = get_ordered_list_of_nuclei_by_distance_to_track(dist,dftime)
-
     # combine the main track with the immediate neighbors into a list
     track_id_list = list(immediate_neighbor_track_ids)
     if return_self:

diff --git a/nuc_morph_analysis/analyses/volume/FigS10_workflow.py b/nuc_morph_analysis/analyses/volume/FigS10_workflow.py
@@ -20,6 +20,8 @@
 from nuc_morph_analysis.lib.visualization import plotting_tools
 from nuc_morph_analysis.analyses.volume_variation import plot_features
 
+from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS
+
 #%%
 # now load data with growth outliers
 df_outliers = load_dataset_with_features('all_baseline', remove_growth_outliers=False)
@@ -36,7 +38,7 @@
 # %%
 # S10 panel A, left
 # choose one track and its neighbors (at a given time) to plot over time
-MAIN_TRACK_ID = 75725
+MAIN_TRACK_ID = EXAMPLE_TRACKS['volume_dip_example']
 TIMEPOINT = 239
 track_id_list = get_a_cells_neighbors_as_track_id_list(df_outliers,MAIN_TRACK_ID,TIMEPOINT)
 fig,axlist = plot_neighbors_volume_over_time(df_outliers,track_id_list)
@@ -57,16 +59,6 @@
     save_and_show_plot(save_path,ext,fig,transparent=False,keep_open=True)
 plt.show()
 
-#%%
-# S10 panel A, right
-# view data in timelapse feature explorer (TFE) with the following link 
-# TODO: update data path to be the final TFE url
-datapath = "https%3A%2F%2Fdev-aics-dtp-001.int.allencell.org%2Fassay-dev%2Fusers%2FFrick%2FPythonProjects%2Frepos%2Flocal_storage%2Ftimelapse_feature_explorer_datasets%2FTFE_new%2Fexploratory_dataset%2Fsmall%2Fmanifest.json"
-url = f"https://timelapse.allencell.org/viewer?dataset={datapath}&feature=change_in_volume_in_25_minute_window&t=239&filters=growth_outlier_filter%3A%3A3%2Cbaseline_colonies_dataset_filter%3A%3A3%2Cfullinterphase_dataset_filter%3A%3A3%2Clineageannotated_dataset_filter%3A%3A3%2Cvolume_jumps_right_magnitude_mask%3A%3A43.916%3A254.426&range=-200%2C200&color=matplotlib-purple_orange&palette-key=adobe&bg-sat=100&bg-brightness=100&fg-alpha=100&outlier-color=c0c0c0&outlier-mode=1&filter-color=dddddd&filter-mode=1&tab=scatter_plot&scalebar=1&timestamp=1&path=1&keep-range=1&scatter-range=all&scatter-x=scatterplot_time&scatter-y=volume"
-print(url)
-# save url as text file in figure folder
-with open(save_dir / 'S10_A_right_url.txt','w') as f:
-    f.write(url)
 # %%
 # S10 panel B, illustrate the effect of volume dip on transient growth rate
 
@@ -79,7 +71,7 @@
 # add_time_point_lines=False,timepoint=None
 volume_dip_example_track = 86570
 
-main_track_list = [(volume_dip_example_track, 263)] #[(86570, 263),(75725, 239), (71532,131)]
+main_track_list = [(volume_dip_example_track, 263)]
 for main_track_id, timepoint in main_track_list:
     ax = axlist[0]
     ax = plot_track_with_volume_dip(ax,df_full,main_track_id,add_time_point_lines=True,timepoint=timepoint)
@@ -110,7 +102,8 @@
 # S10 panel C step3
 df_track = df_full[df_full.track_id == volume_dip_example_track]
 fig,axlist = plt.subplots(2,1,figsize=(fw,fh),sharey=False)
-axlist = np.asarray([axlist]) if type(axlist) != np.ndarray else axlist
+axlist = np.asarray(axlist) if type(axlist) != np.ndarray else axlist # for mypy
+assert type(axlist) == np.ndarray # for mypy
 
 _ = plot_track_with_fit_line(df_track,axlist[0],
                                 ycol1='volume',
@@ -173,8 +166,6 @@
         yn = df_all['number_of_nuclei']
         y = yd / yn *100
 
-        print(np.where(df_all['number_of_dips'] > 5))
-
         zorderval = 1 if threshold !=0 else -1 # to ensure large colony is in front when it has fewer peaks
         ax.plot(x,y,label=colony,color=plotting_tools.COLONY_COLORS[colony],zorder=ci*1000*zorderval)
     ax.set_xlabel(f"{xlabel} {xunit}")
@@ -193,9 +184,6 @@
                 markerscale=1,handlelength=1,
                 labelspacing=0,
                 )
-    # if threshold == -50:
-    #     ax.set_yticks(np.arange(0,110,10))
-    #     ax.set_ylim(0,30)
     if threshold != -100:
         curr_ylim = ax.get_ylim()
 
@@ -228,7 +216,6 @@
     fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075)
 
     plt.suptitle(f"{ycol}")
-    # savepath = figdir / f"cell_cycle_bins_{ycol}_{xcol1}_{plot_type}.png"
     for ext in ['.png','.pdf']:
         savepath = save_dir / f"S10_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}"
         save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True)
@@ -237,7 +224,6 @@
 #%%
 # S10 panel F and G
 colony='all_baseline'
-# color = "colony" if colony != "all_baseline" else "#808080"
 
 dfc = df_full[df_full["colony"] == colony] if colony != "all_baseline" else df_full
 for local_radius_str in ["90um", "whole_colony"]:

diff --git a/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/correlating_transient_growth_rates.py b/nuc_morph_analysis/analyses/volume/extra_dxdt_analysis/correlating_transient_growth_rates.py
@@ -61,7 +61,7 @@
         dfcat = pd.merge(df1,df2,on='index_sequence',suffixes=('_1','_2'),how='inner')
         dfcat.dropna(subset=[f'nanmean_1',f'nanmean_2'],inplace=True)
 
-        x = dfcat[f'nanmean_1']
+        x = dfcat[f'nanmean_1'].astype('array')
         y = dfcat[f'nanmean_2']
 
         reg = LinearRegression().fit(x.values.reshape(-1,1),y)

diff --git a/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py b/nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py
@@ -248,7 +248,6 @@
     fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075)
 
     plt.suptitle(f"{ycol}")
-    # savepath = figdir / f"cell_cycle_bins_{ycol}_{xcol1}_{plot_type}.png"
     for ext in ['.png','.pdf']:
         savepath = Path(figdir) / f"S5_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}"
         save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True)

diff --git a/nuc_morph_analysis/analyses/volume/filter_out_dips.py b/nuc_morph_analysis/analyses/volume/filter_out_dips.py
@@ -159,7 +159,6 @@ def find_peaks_and_collect_features(vol_det_array, vol_array, index_sequence_vec
     # convert the output to a dataframe
     dfout_list = [pd.DataFrame(x.values(),columns = index_sequence_vec, index=x.keys()).T for x in out]
     dfout_list = [x.reset_index().rename(columns={'index':'index_sequence'}).set_index('index_sequence') for x in dfout_list]
-    # keys = ['volume_dips_peak_mask_at_region','volume_dips_centers','volume_dips_has_peak','volume_dips_prom','volume_dips_left_base','volume_dips_right_base','volume_dips_y2_magnitude']
     dfout = pd.concat(dfout_list,axis=0,keys=track_id_vec, names=['track_id']).reset_index()
 
     return dfout
@@ -335,7 +334,6 @@ def filter_out_volume_dips(dfd, volume_cols, find_dips=True, use_detrended=True,
     # they are added during the pivot operation
     # we will drop these rows
     not_in_dfdi = dfmi.index.difference(dfdi.index)
-    # print(f"dropping {len(not_in_dfdi)} rows")
     dfmi.drop(not_in_dfdi, inplace=True)
 
     dfmi.loc[dfmi.index.values, "CellId"] = dfdi.loc[dfmi.index.values, "CellId"]

diff --git a/nuc_morph_analysis/analyses/volume/plot_help.py b/nuc_morph_analysis/analyses/volume/plot_help.py
@@ -34,7 +34,6 @@ def adjust_axis_positions(fig,ax,curr_pos=None,width=1,height=0.7,space=0.075,ke
     fw,fh = fig.get_size_inches()
     for ci,cax in enumerate(ax):
         # make the axis = 1.0" wide x 0.7" tall
-        print(width,height,fw,fh)
         if curr_pos is None:
             curr_pos = [1,1 +  height,width,height]
         else:
@@ -119,10 +118,7 @@ def plot_dfg(dfcc,xcol,ycol,labelstr,curr_ax,plot_type='mean',colorby=None,requi
     # remove rows with less than 10 counts
     dfg = group_and_extract(dfcc,xcol,ycol)
     dfgindex = dfg['count']<required_N
-    print(f" timepoints with less than {required_N} counts: {dfg[dfgindex].index.values}")
-    dfg= dfg[dfg['count'] >= required_N]
-    print(labelstr,dfg['count'].min(),dfg['count'].max(),dfg['count'].mean(),dfg['count'].sum(), "t=",dfg.shape[0])
-
+    dfg= dfg[dfg['count'] >= required_N]    
 
     xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol)
     yscale,ylabel,yunit,_ = get_plot_labels_for_metric(ycol)

diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py
@@ -1,5 +1,6 @@
 from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees
 from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
+from nuc_morph_analysis.lib.preprocessing.compute_change_over_time import run_script
 import numpy as np
 
 FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"}
@@ -683,3 +684,41 @@ def add_mean_features(df,
     multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list]
     df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list)
     return df
+
+def add_volume_change_over_25_minute_window(df, bin_interval=5):
+    """
+    Adds a new column to the dataframe that quantifies how much the volume has changed relative to 
+    25 minutes in the past (units are pixels^3)
+    this is useful for identifying volume dips in all tracks (see Fig S10)
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The input dataframe.
+    bin_interval : int
+        represents the number of frames to compute change in volume over
+        default is 5 frames, which is 25 minutes
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        The dataframe with the new column 'volume_change_over_25_minutes' added.
+        (units are pixels^3)
+    """
+    dfm = df.copy()
+    # run the compute_change_over_time workflow for a given bin_interval
+    dfm = run_script(dfm,['volume'], [bin_interval], time_location='end')
+    dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5
+
+    # now check that all columns in df have the same dtype as columns in dfm
+    for col in df.columns:
+        if dfm[col].dtype != df[col].dtype:
+            print(f"column {col} has dtype {dfm[col].dtype} in dfm and {df[col].dtype} in df")
+
+    if dfm.shape[0] != df.shape[0]:
+        raise Exception(
+            f"The loaded manifest has {df.shape[0]} rows and your \
+            final manifest has {dfm.shape[0]} rows.\
+            Please revise code to leave manifest rows unchanged."
+        )
+    return dfm
diff --git a/nuc_morph_analysis/lib/preprocessing/add_times.py b/nuc_morph_analysis/lib/preprocessing/add_times.py
@@ -512,25 +512,15 @@ def digitize_time_column(df, minval, maxval, number_of_bins=None, step_size=None
     df[new_col] = dig_time_array
     return df
 
-def validate_dig_time_with_plot(time_array = np.linspace(0,1,1000), number_of_bins=6, old_method=False):
+def validate_dig_time_with_plot(time_array = np.linspace(0,1,1000), number_of_bins=6):
     """
     this visualizes how the input array is binned by plotting
     the input array (x-axis) vs the digitized array (y-axis)
     """
 
-    if old_method:
-        time_array = np.linspace(0,1,1000)
-        TIME_BIN = 1/number_of_bins
-        df_agg = pd.DataFrame({'normalized_time':time_array})
-        timedig_bins = np.arange(0, 1 + TIME_BIN, TIME_BIN)
-        inds = np.digitize(df_agg["normalized_time"], timedig_bins)
-        df_agg["dig_time"] = timedig_bins[inds - 1]
-        dig_time = df_agg['dig_time'].values
-        extrastr = '\n(old method)'
-    else:
-        bin_centers = determine_bin_centers(0,1,number_of_bins=number_of_bins)
-        dig_time = digitize_time_array(time_array,bin_centers)
-        extrastr = ''
+    bin_centers = determine_bin_centers(0,1,number_of_bins=number_of_bins)
+    dig_time = digitize_time_array(time_array,bin_centers)
+    extrastr = ''
     fig, ax = plt.subplots(figsize=(3,3))
     plt.plot(time_array,dig_time)
     plt.xlabel('time')

diff --git a/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py b/nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py
@@ -131,28 +131,3 @@ def run_script(df=None, dxdt_feature_list = None, bin_interval_list=None, exclud
     assert dforig.index.name == "CellId"
     return dforig
 
-
-def add_dvdt_over_V(df,columns=None,volume_col = 'volume'):
-    """
-    adds dvdt over V for all timepoints if dxdt_{time}_volume columns exist
-
-    Parameters
-    ----------
-    df : pd.DataFrame
-        dataframe with columns  columns + ['volume']
-    columns : list
-        list of columns to compute change normalized by volume
-    volume_col : str
-        name of the volume column, default is 'volume'
-
-    Returns
-    -------
-    df : pd.DataFrame
-        dataframe with columns ['{column}_per_V'] added
-    """
-    if columns is None:
-        columns = [f"{DXDT_PREFIX}{bin_interval}_{feature}" for bin_interval in BIN_INTERVAL_LIST for feature in DXDT_FEATURE_LIST]
-
-    for col in columns:
-        df[f"{col}_per_V"] = df[col] / df[volume_col]
-    return df
diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
@@ -20,7 +20,6 @@
 from nuc_morph_analysis.analyses.height.add_colony_time import add_colony_time_all_datasets
 from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
 from nuc_morph_analysis.lib.preprocessing import labeling_neighbors_helper
-from nuc_morph_analysis.lib.preprocessing.compute_change_over_time import add_dvdt_over_V
 from nuc_morph_analysis.analyses.volume import filter_out_dips
 
 
@@ -206,9 +205,8 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers):
     df = add_fov_touch_timepoint_for_colonies(df)
     df = add_features.add_non_interphase_size_shape_flag(df)
     df = add_change_over_time(df)
-    df = add_volume_change_over_25_minute_window(df)
+    df = add_features.add_volume_change_over_25_minute_window(df)
 
-    df = add_dvdt_over_V(df)
     df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers)
     df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers, 
                                                 feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"],
@@ -389,45 +387,6 @@ def add_change_over_time(df, dxdt_feature_list=None, bin_interval_list=None):
         )
     return dfm
 
-def add_volume_change_over_25_minute_window(df, bin_interval=5):
-    """
-    Adds a new column to the dataframe that quantifies how much the volume has changed relative to 
-    25 minutes in the past (units are pixels^3)
-    this is useful for identifying volume dips in all tracks (see Fig S10)
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-        The input dataframe.
-    bin_interval : int
-        represents the number of frames to compute change in volume over
-        default is 5 frames, which is 25 minutes
-
-    Returns
-    -------
-    df : pandas.DataFrame
-        The dataframe with the new column 'volume_change_over_25_minutes' added.
-        (units are pixels^3)
-    """
-    dfm = df.copy()
-    dfm = compute_change_over_time.run_script(dfm,
-                                               ['volume'],
-                                                 [bin_interval],
-                                                   time_location='end')
-    dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5
-
-    # now check that all columns in df have the same dtype as columns in dfm
-    for col in df.columns:
-        if dfm[col].dtype != df[col].dtype:
-            print(f"column {col} has dtype {dfm[col].dtype} in dfm and {df[col].dtype} in df")
-
-    if dfm.shape[0] != df.shape[0]:
-        raise Exception(
-            f"The loaded manifest has {df.shape[0]} rows and your \
-            final manifest has {dfm.shape[0]} rows.\
-            Please revise code to leave manifest rows unchanged."
-        )
-    return dfm
 
 # %%
 if __name__ == "__main__":

diff --git a/nuc_morph_analysis/lib/visualization/example_tracks.py b/nuc_morph_analysis/lib/visualization/example_tracks.py
@@ -23,4 +23,5 @@
     "delta_v_BC_low": 86418,
     "transition_point_supplement": 82210,
     "sample_full_trajectories": [97942, 85296, 9808, 77656, 83322],
+    "volume_dip_example": 75725,
 }
diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py
@@ -517,13 +517,6 @@ def make_dataset(
     # load the dataset once
     df_all = load_dataset_with_features("all_baseline", remove_growth_outliers=False)
 
-    # df_all2 = df_all.copy()
-    # df_all2.drop(columns=["exiting_mitosis"], inplace=True)
-    # df_all2 = add_features.add_division_entry_and_exit_annotations(df_all2,formation_threshold=12)
-    # df_all2.rename(columns={"exiting_mitosis": "exiting_mitosis_short"}, inplace=True)
-
-    # df_all = df_all.join(df_all2[['exiting_mitosis_short']])
-
     for filter in filters:
         output_dir_subset = Path(output_dir) / filter
         output_dir_subset.mkdir(parents=True, exist_ok=True)

diff --git a/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md b/nuc_morph_analysis/lib/visualization/write_data_for_colorizer_README.md
@@ -30,11 +30,4 @@ To overwrite an existing segmented version of the dataset (for example to add/re
 
 ```
 pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir {existing_output_dir_name} --noframes
-pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_new/ --noframes
-
-
-
-pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_full/ --parallel
-
-
 ```