Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7ab870d
add example track id to dict
jcass11 Dec 2, 2024
50d8b8b
remove commented code
jcass11 Dec 2, 2024
a651115
remove use of old method
jcass11 Dec 2, 2024
cbf4360
remove commented code
jcass11 Dec 2, 2024
6fe34cb
remove personal filepath example
jcass11 Dec 2, 2024
2f266b4
move 25 min time window change in time calc to add features
jcass11 Dec 2, 2024
c238bcc
remove dvdt over v that is no longer used
jcass11 Dec 2, 2024
5bf93c2
remove old method
jcass11 Dec 2, 2024
c77dc3b
remove print statements and commented out code
jcass11 Dec 2, 2024
5123c51
use example tracks dict, remove TFE link and remove print statements …
jcass11 Dec 2, 2024
0e843b0
try to fix mypy error with reshape
jcass11 Dec 2, 2024
1b8d698
try to fix axis array issue causing mypy tests to fail
jcass11 Dec 2, 2024
d32b782
fix import from moving function
jcass11 Dec 2, 2024
0755338
fix imports from moving function to add features
jcass11 Dec 2, 2024
bf362d4
continuing to try to fix mypy ax array errors
jcass11 Dec 2, 2024
6869b5d
continuing to try to fix mypy ax array errors
jcass11 Dec 2, 2024
e267c5a
remove print statement that prints a ton to the terminal
jcass11 Dec 3, 2024
5c8303f
Resolve matplotlib subplot axes list type
pgarrison Dec 3, 2024
594ba2f
Docstring for type_axlist
pgarrison Dec 3, 2024
1b344c7
Remove unused imports
pgarrison Dec 3, 2024
88bf22f
Fix save path/name types (also save_dir/save_dir/savename issue)
pgarrison Dec 3, 2024
2307e3d
Fix mypy path issue
jcass11 Dec 3, 2024
7e3833b
Fix mypy path error
jcass11 Dec 3, 2024
d3e5129
update comment - s10 is added, no longer "coming soon"
jcass11 Dec 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,3 @@
from nuc_morph_analysis.lib.preprocessing import add_times
fig,ax = add_times.validate_dig_time_with_plot()
fig.show()
#%%
fig,ax = add_times.validate_dig_time_with_plot(old_method=True)
fig.show()
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ def get_a_cells_neighbors_as_track_id_list(df0,main_track_id,TIMEPOINT,return_se
# find all neighbors that are immediate neighbors (and have passed the transition point)
immediate_neighbor_track_ids = find_immediate_neighbors(df_after_transition_only,main_track_id,TIMEPOINT)

# # compute distances between nuclei and sort them by distance to main track
# dftime = dfcolony[dfcolony["index_sequence"] == TIMEPOINT]
# dist = compute_distances_between_nuclei(dftime,MAIN_TRACK_ID)
# sorted_dist,sorted_cell_ids,sorted_track_ids,sorted_index = get_ordered_list_of_nuclei_by_distance_to_track(dist,dftime)

# combine the main track with the immediate neighbors into a list
track_id_list = list(immediate_neighbor_track_ids)
if return_self:
Expand Down
26 changes: 6 additions & 20 deletions nuc_morph_analysis/analyses/volume/FigS10_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from nuc_morph_analysis.lib.visualization import plotting_tools
from nuc_morph_analysis.analyses.volume_variation import plot_features

from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS

#%%
# now load data with growth outliers
df_outliers = load_dataset_with_features('all_baseline', remove_growth_outliers=False)
Expand All @@ -36,7 +38,7 @@
# %%
# S10 panel A, left
# choose one track and its neighbors (at a given time) to plot over time
MAIN_TRACK_ID = 75725
MAIN_TRACK_ID = EXAMPLE_TRACKS['volume_dip_example']
TIMEPOINT = 239
track_id_list = get_a_cells_neighbors_as_track_id_list(df_outliers,MAIN_TRACK_ID,TIMEPOINT)
fig,axlist = plot_neighbors_volume_over_time(df_outliers,track_id_list)
Expand All @@ -57,16 +59,6 @@
save_and_show_plot(save_path,ext,fig,transparent=False,keep_open=True)
plt.show()

#%%
# S10 panel A, right
# view data in timelapse feature explorer (TFE) with the following link
# TODO: update data path to be the final TFE url
datapath = "https%3A%2F%2Fdev-aics-dtp-001.int.allencell.org%2Fassay-dev%2Fusers%2FFrick%2FPythonProjects%2Frepos%2Flocal_storage%2Ftimelapse_feature_explorer_datasets%2FTFE_new%2Fexploratory_dataset%2Fsmall%2Fmanifest.json"
url = f"https://timelapse.allencell.org/viewer?dataset={datapath}&feature=change_in_volume_in_25_minute_window&t=239&filters=growth_outlier_filter%3A%3A3%2Cbaseline_colonies_dataset_filter%3A%3A3%2Cfullinterphase_dataset_filter%3A%3A3%2Clineageannotated_dataset_filter%3A%3A3%2Cvolume_jumps_right_magnitude_mask%3A%3A43.916%3A254.426&range=-200%2C200&color=matplotlib-purple_orange&palette-key=adobe&bg-sat=100&bg-brightness=100&fg-alpha=100&outlier-color=c0c0c0&outlier-mode=1&filter-color=dddddd&filter-mode=1&tab=scatter_plot&scalebar=1&timestamp=1&path=1&keep-range=1&scatter-range=all&scatter-x=scatterplot_time&scatter-y=volume"
print(url)
# save url as text file in figure folder
with open(save_dir / 'S10_A_right_url.txt','w') as f:
f.write(url)
# %%
# S10 panel B, illustrate the effect of volume dip on transient growth rate

Expand All @@ -79,7 +71,7 @@
# add_time_point_lines=False,timepoint=None
volume_dip_example_track = 86570

main_track_list = [(volume_dip_example_track, 263)] #[(86570, 263),(75725, 239), (71532,131)]
main_track_list = [(volume_dip_example_track, 263)]
for main_track_id, timepoint in main_track_list:
ax = axlist[0]
ax = plot_track_with_volume_dip(ax,df_full,main_track_id,add_time_point_lines=True,timepoint=timepoint)
Expand Down Expand Up @@ -110,7 +102,8 @@
# S10 panel C step3
df_track = df_full[df_full.track_id == volume_dip_example_track]
fig,axlist = plt.subplots(2,1,figsize=(fw,fh),sharey=False)
axlist = np.asarray([axlist]) if type(axlist) != np.ndarray else axlist
axlist = np.asarray(axlist) if type(axlist) != np.ndarray else axlist # for mypy
assert type(axlist) == np.ndarray # for mypy

_ = plot_track_with_fit_line(df_track,axlist[0],
ycol1='volume',
Expand Down Expand Up @@ -173,8 +166,6 @@
yn = df_all['number_of_nuclei']
y = yd / yn *100

print(np.where(df_all['number_of_dips'] > 5))

zorderval = 1 if threshold !=0 else -1 # to ensure large colony is in front when it has fewer peaks
ax.plot(x,y,label=colony,color=plotting_tools.COLONY_COLORS[colony],zorder=ci*1000*zorderval)
ax.set_xlabel(f"{xlabel} {xunit}")
Expand All @@ -193,9 +184,6 @@
markerscale=1,handlelength=1,
labelspacing=0,
)
# if threshold == -50:
# ax.set_yticks(np.arange(0,110,10))
# ax.set_ylim(0,30)
if threshold != -100:
curr_ylim = ax.get_ylim()

Expand Down Expand Up @@ -228,7 +216,6 @@
fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075)

plt.suptitle(f"{ycol}")
# savepath = figdir / f"cell_cycle_bins_{ycol}_{xcol1}_{plot_type}.png"
for ext in ['.png','.pdf']:
savepath = save_dir / f"S10_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}"
save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True)
Expand All @@ -237,7 +224,6 @@
#%%
# S10 panel F and G
colony='all_baseline'
# color = "colony" if colony != "all_baseline" else "#808080"

dfc = df_full[df_full["colony"] == colony] if colony != "all_baseline" else df_full
for local_radius_str in ["90um", "whole_colony"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
dfcat = pd.merge(df1,df2,on='index_sequence',suffixes=('_1','_2'),how='inner')
dfcat.dropna(subset=[f'nanmean_1',f'nanmean_2'],inplace=True)

x = dfcat[f'nanmean_1']
x = dfcat[f'nanmean_1'].astype('array')
y = dfcat[f'nanmean_2']

reg = LinearRegression().fit(x.values.reshape(-1,1),y)
Expand Down
1 change: 0 additions & 1 deletion nuc_morph_analysis/analyses/volume/figure_5_s9_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@
fig,ax = adjust_axis_positions(fig,ax,curr_pos=None,width=0.9,height=0.6,space=0.075)

plt.suptitle(f"{ycol}")
# savepath = figdir / f"cell_cycle_bins_{ycol}_{xcol1}_{plot_type}.png"
for ext in ['.png','.pdf']:
savepath = Path(figdir) / f"S5_E-cell_cycle_bins_for_only_{colony}_{ycol}_{ext}"
save_and_show_plot(str(savepath),ext,fig,transparent=False,keep_open=True)
Expand Down
2 changes: 0 additions & 2 deletions nuc_morph_analysis/analyses/volume/filter_out_dips.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def find_peaks_and_collect_features(vol_det_array, vol_array, index_sequence_vec
# convert the output to a dataframe
dfout_list = [pd.DataFrame(x.values(),columns = index_sequence_vec, index=x.keys()).T for x in out]
dfout_list = [x.reset_index().rename(columns={'index':'index_sequence'}).set_index('index_sequence') for x in dfout_list]
# keys = ['volume_dips_peak_mask_at_region','volume_dips_centers','volume_dips_has_peak','volume_dips_prom','volume_dips_left_base','volume_dips_right_base','volume_dips_y2_magnitude']
dfout = pd.concat(dfout_list,axis=0,keys=track_id_vec, names=['track_id']).reset_index()

return dfout
Expand Down Expand Up @@ -335,7 +334,6 @@ def filter_out_volume_dips(dfd, volume_cols, find_dips=True, use_detrended=True,
# they are added during the pivot operation
# we will drop these rows
not_in_dfdi = dfmi.index.difference(dfdi.index)
# print(f"dropping {len(not_in_dfdi)} rows")
dfmi.drop(not_in_dfdi, inplace=True)

dfmi.loc[dfmi.index.values, "CellId"] = dfdi.loc[dfmi.index.values, "CellId"]
Expand Down
6 changes: 1 addition & 5 deletions nuc_morph_analysis/analyses/volume/plot_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def adjust_axis_positions(fig,ax,curr_pos=None,width=1,height=0.7,space=0.075,ke
fw,fh = fig.get_size_inches()
for ci,cax in enumerate(ax):
# make the axis = 1.0" wide x 0.7" tall
print(width,height,fw,fh)
if curr_pos is None:
curr_pos = [1,1 + height,width,height]
else:
Expand Down Expand Up @@ -119,10 +118,7 @@ def plot_dfg(dfcc,xcol,ycol,labelstr,curr_ax,plot_type='mean',colorby=None,requi
# remove rows with less than 10 counts
dfg = group_and_extract(dfcc,xcol,ycol)
dfgindex = dfg['count']<required_N
print(f" timepoints with less than {required_N} counts: {dfg[dfgindex].index.values}")
dfg= dfg[dfg['count'] >= required_N]
print(labelstr,dfg['count'].min(),dfg['count'].max(),dfg['count'].mean(),dfg['count'].sum(), "t=",dfg.shape[0])

dfg= dfg[dfg['count'] >= required_N]

xscale,xlabel,xunit,_ = get_plot_labels_for_metric(xcol)
yscale,ylabel,yunit,_ = get_plot_labels_for_metric(ycol)
Expand Down
39 changes: 39 additions & 0 deletions nuc_morph_analysis/lib/preprocessing/add_features.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees
from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
from nuc_morph_analysis.lib.preprocessing.compute_change_over_time import run_script
import numpy as np

FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"}
Expand Down Expand Up @@ -683,3 +684,41 @@ def add_mean_features(df,
multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list]
df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list)
return df

def add_volume_change_over_25_minute_window(df, bin_interval=5):
"""
Adds a new column to the dataframe that quantifies how much the volume has changed relative to
25 minutes in the past (units are pixels^3)
this is useful for identifying volume dips in all tracks (see Fig S10)

Parameters
----------
df : pandas.DataFrame
The input dataframe.
bin_interval : int
represents the number of frames to compute change in volume over
default is 5 frames, which is 25 minutes

Returns
-------
df : pandas.DataFrame
The dataframe with the new column 'volume_change_over_25_minutes' added.
(units are pixels^3)
"""
dfm = df.copy()
# run the compute_change_over_time workflow for a given bin_interval
dfm = run_script(dfm,['volume'], [bin_interval], time_location='end')
dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5

# now check that all columns in df have the same dtype as columns in dfm
for col in df.columns:
if dfm[col].dtype != df[col].dtype:
print(f"column {col} has dtype {dfm[col].dtype} in dfm and {df[col].dtype} in df")

if dfm.shape[0] != df.shape[0]:
raise Exception(
f"The loaded manifest has {df.shape[0]} rows and your \
final manifest has {dfm.shape[0]} rows.\
Please revise code to leave manifest rows unchanged."
)
return dfm
18 changes: 4 additions & 14 deletions nuc_morph_analysis/lib/preprocessing/add_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,25 +512,15 @@ def digitize_time_column(df, minval, maxval, number_of_bins=None, step_size=None
df[new_col] = dig_time_array
return df

def validate_dig_time_with_plot(time_array = np.linspace(0,1,1000), number_of_bins=6, old_method=False):
def validate_dig_time_with_plot(time_array = np.linspace(0,1,1000), number_of_bins=6):
"""
this visualizes how the input array is binned by plotting
the input array (x-axis) vs the digitized array (y-axis)
"""

if old_method:
time_array = np.linspace(0,1,1000)
TIME_BIN = 1/number_of_bins
df_agg = pd.DataFrame({'normalized_time':time_array})
timedig_bins = np.arange(0, 1 + TIME_BIN, TIME_BIN)
inds = np.digitize(df_agg["normalized_time"], timedig_bins)
df_agg["dig_time"] = timedig_bins[inds - 1]
dig_time = df_agg['dig_time'].values
extrastr = '\n(old method)'
else:
bin_centers = determine_bin_centers(0,1,number_of_bins=number_of_bins)
dig_time = digitize_time_array(time_array,bin_centers)
extrastr = ''
bin_centers = determine_bin_centers(0,1,number_of_bins=number_of_bins)
dig_time = digitize_time_array(time_array,bin_centers)
extrastr = ''
fig, ax = plt.subplots(figsize=(3,3))
plt.plot(time_array,dig_time)
plt.xlabel('time')
Expand Down
25 changes: 0 additions & 25 deletions nuc_morph_analysis/lib/preprocessing/compute_change_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,28 +131,3 @@ def run_script(df=None, dxdt_feature_list = None, bin_interval_list=None, exclud
assert dforig.index.name == "CellId"
return dforig


def add_dvdt_over_V(df,columns=None,volume_col = 'volume'):
"""
adds dvdt over V for all timepoints if dxdt_{time}_volume columns exist

Parameters
----------
df : pd.DataFrame
dataframe with columns columns + ['volume']
columns : list
list of columns to compute change normalized by volume
volume_col : str
name of the volume column, default is 'volume'

Returns
-------
df : pd.DataFrame
dataframe with columns ['{column}_per_V'] added
"""
if columns is None:
columns = [f"{DXDT_PREFIX}{bin_interval}_{feature}" for bin_interval in BIN_INTERVAL_LIST for feature in DXDT_FEATURE_LIST]

for col in columns:
df[f"{col}_per_V"] = df[col] / df[volume_col]
return df
43 changes: 1 addition & 42 deletions nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from nuc_morph_analysis.analyses.height.add_colony_time import add_colony_time_all_datasets
from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
from nuc_morph_analysis.lib.preprocessing import labeling_neighbors_helper
from nuc_morph_analysis.lib.preprocessing.compute_change_over_time import add_dvdt_over_V
from nuc_morph_analysis.analyses.volume import filter_out_dips


Expand Down Expand Up @@ -206,9 +205,8 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers):
df = add_fov_touch_timepoint_for_colonies(df)
df = add_features.add_non_interphase_size_shape_flag(df)
df = add_change_over_time(df)
df = add_volume_change_over_25_minute_window(df)
df = add_features.add_volume_change_over_25_minute_window(df)

df = add_dvdt_over_V(df)
df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers)
df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers,
feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"],
Expand Down Expand Up @@ -389,45 +387,6 @@ def add_change_over_time(df, dxdt_feature_list=None, bin_interval_list=None):
)
return dfm

def add_volume_change_over_25_minute_window(df, bin_interval=5):
"""
Adds a new column to the dataframe that quantifies how much the volume has changed relative to
25 minutes in the past (units are pixels^3)
this is useful for identifying volume dips in all tracks (see Fig S10)

Parameters
----------
df : pandas.DataFrame
The input dataframe.
bin_interval : int
represents the number of frames to compute change in volume over
default is 5 frames, which is 25 minutes

Returns
-------
df : pandas.DataFrame
The dataframe with the new column 'volume_change_over_25_minutes' added.
(units are pixels^3)
"""
dfm = df.copy()
dfm = compute_change_over_time.run_script(dfm,
['volume'],
[bin_interval],
time_location='end')
dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5

# now check that all columns in df have the same dtype as columns in dfm
for col in df.columns:
if dfm[col].dtype != df[col].dtype:
print(f"column {col} has dtype {dfm[col].dtype} in dfm and {df[col].dtype} in df")

if dfm.shape[0] != df.shape[0]:
raise Exception(
f"The loaded manifest has {df.shape[0]} rows and your \
final manifest has {dfm.shape[0]} rows.\
Please revise code to leave manifest rows unchanged."
)
return dfm

# %%
if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions nuc_morph_analysis/lib/visualization/example_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@
"delta_v_BC_low": 86418,
"transition_point_supplement": 82210,
"sample_full_trajectories": [97942, 85296, 9808, 77656, 83322],
"volume_dip_example": 75725,
}
Original file line number Diff line number Diff line change
Expand Up @@ -517,13 +517,6 @@ def make_dataset(
# load the dataset once
df_all = load_dataset_with_features("all_baseline", remove_growth_outliers=False)

# df_all2 = df_all.copy()
# df_all2.drop(columns=["exiting_mitosis"], inplace=True)
# df_all2 = add_features.add_division_entry_and_exit_annotations(df_all2,formation_threshold=12)
# df_all2.rename(columns={"exiting_mitosis": "exiting_mitosis_short"}, inplace=True)

# df_all = df_all.join(df_all2[['exiting_mitosis_short']])

for filter in filters:
output_dir_subset = Path(output_dir) / filter
output_dir_subset.mkdir(parents=True, exist_ok=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,4 @@ To overwrite an existing segmented version of the dataset (for example to add/re

```
pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir {existing_output_dir_name} --noframes
pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_new/ --noframes



pdm run nuc_morph_analysis/lib/visualization/write_data_for_colorizer.py --output_dir //allen/aics/assay-dev/users/Frick/PythonProjects/repos/local_storage/timelapse_feature_explorer_datasets/TFE_full/ --parallel


```
Loading