diff --git a/nuc_morph_analysis/analyses/dataset_images_for_figures/figure_helper.py b/nuc_morph_analysis/analyses/dataset_images_for_figures/figure_helper.py index a660bd6d..31071488 100644 --- a/nuc_morph_analysis/analyses/dataset_images_for_figures/figure_helper.py +++ b/nuc_morph_analysis/analyses/dataset_images_for_figures/figure_helper.py @@ -98,7 +98,7 @@ def get_save_dir_and_fig_panel_str(figure, panel): return savedir, fig_panel_str -def return_glasbey_on_dark(N=255, cell_id=None, cell_color=None): +def return_glasbey_on_dark(N=255, cell_id=None, cell_color=None, from_list=False): """ Publication The Glasbey LUT is based on the publication: @@ -109,8 +109,9 @@ def return_glasbey_on_dark(N=255, cell_id=None, cell_color=None): N: int, the number of colors to return cell_id: int, the cell id to change the color of cell_color: array, the RGB color to change the cell to + from_list: bool, whether colormap should be returned from ListedColormap or not """ - from matplotlib.colors import LinearSegmentedColormap + from matplotlib.colors import LinearSegmentedColormap, ListedColormap from nuc_morph_analysis.analyses.dataset_images_for_figures.glasbey_on_dark import ( glasbey_on_dark_func, ) @@ -136,7 +137,8 @@ def return_glasbey_on_dark(N=255, cell_id=None, cell_color=None): cmap_name = "glasbey_on_dark" rgb_array0_1 = [tuple(np.asarray(x) / 255) for x in glasbey_on_dark] cmap = LinearSegmentedColormap.from_list(cmap_name, rgb_array0_1, N=N + 1) - + if from_list: + cmap = ListedColormap(rgb_array0_1, cmap_name, N=N) return np.asarray(rgb_array0_255), cmap, rgb_array0_255 diff --git a/nuc_morph_analysis/analyses/density/draw_nucleus_contours_on_watershed.py b/nuc_morph_analysis/analyses/density/draw_nucleus_contours_on_watershed.py deleted file mode 100644 index 83eede14..00000000 --- a/nuc_morph_analysis/analyses/density/draw_nucleus_contours_on_watershed.py +++ /dev/null @@ -1,106 +0,0 @@ -#%% -from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow, pseudo_cell_helper, pseudo_cell_testing_helper -from pathlib import Path -import pandas as pd -from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering -from nuc_morph_analysis.lib.preprocessing import filter_data - -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.cm as cm -from nuc_morph_analysis.analyses.dataset_images_for_figures.figure_helper import return_glasbey_on_dark -from skimage.measure import find_contours -from nuc_morph_analysis.lib.visualization.plotting_tools import colorize_image -from nuc_morph_analysis.analyses.density.watershed_validate import get_contours_from_pair_of_2d_seg_image, draw_contours_on_image - - -def run_validation_and_plot(TIMEPOINT=48,colony='medium',RESOLUTION_LEVEL=1,plot_everything=False, testing=False): - """ - run an image through the watershed based pseudo cell segmentation and examine the outputs - optionally, run a test image through the same pipeline - - Parameters - ---------- - TIMEPOINT : int, optional - The timepoint to analyze, by default 48 - colony : str, optional - The colony to analyze, by default 'medium' - RESOLUTION_LEVEL : int, optional - The resolution level to analyze, by default 1 - plot_everything : bool, optional - Whether to plot a large set of extra images colored by features and with contours, by default True - testing : bool, optional - Whether to run a test image through the pipeline, by default False - - Returns - ------- - pd.DataFrame - The dataframe containing the pseudo cell segmentation results - if plot_everything is False - if plot_everything is True, returns the full dataframe (after merging with the tracking dataset) - - """ - - # perform watershed based pseudo cell segmentation - if testing: # on test data - labeled_nucleus_image = pseudo_cell_testing_helper.make_nucleus_image_array() - df_2d, img_dict = pseudo_cell_helper.get_pseudo_cell_boundaries(labeled_nucleus_image, return_img_dict=True) - colony = 'test' - else: # or on real data - df_2d, img_dict = watershed_workflow.get_image_and_run(colony, TIMEPOINT, RESOLUTION_LEVEL, return_img_dict=True) - - # now display all of the intermediates of the - # watershed based pseudo cell segmentation - # 'cyto_distance':(cyto_distance, "Distance Transform restricted\nto Cytoplasmic Segmentation"), - # 'nucleus_edge_img':(nucleus_edge_img, "Nucleus Edge"), - cmapstr = 'viridis' - nuc_mip = img_dict['mip_of_labeled_image'][0] - cell_mip = img_dict['pseudo_cells_img'][0] - - nuc_edge = img_dict['nucleus_edge_img'][0] - cytp_dist = img_dict['cyto_distance'][0] - for full_crop, sizes in [('crop',(500,200,500,500)),('full',(0,0,nuc_edge.shape[1],nuc_edge.shape[0]))]: - x1,y1,w,h = sizes - cimg = cytp_dist - - # define the colormap for the image - cmap = cm.get_cmap(cmapstr) - - fig, axlist = plt.subplots(1, 1, figsize=(6, 4)) - - vmin,vmax = (None,None) - mappable = axlist.imshow(cimg, interpolation='nearest',cmap=cmap, - vmin=vmin,vmax=vmax, - ) - cbar = plt.colorbar(mappable,ax=axlist,label='cyto_distance') - - # create the contours - contour_list = get_contours_from_pair_of_2d_seg_image(nuc_mip,cell_mip) - # remove cell contours - contour_list2 = [] - for contour in contour_list: - contour_item = list(contour) - contour_item[2] = [] # empty list - contour_list2.append(contour_item) - - draw_contours_on_image(axlist,contour_list2,new_color=np.asarray((200,0,200))) - # remove the axis - plt.axis('off') - plt.xlim([x1,x1+w]) - plt.ylim([y1,y1+h]) - titlestr = f"edges of nuclei overlaid on cytoplasmic distance transform\n{colony}_{TIMEPOINT}_{full_crop}_res{RESOLUTION_LEVEL}" - plt.title(f'{titlestr}') - - # now save the figure - savedir = Path(__file__).parent / 'figures' / 'validating nucleus edge' - savedir.mkdir(exist_ok=True,parents=True) - savename = f'{colony}_{TIMEPOINT}_{full_crop}_res{RESOLUTION_LEVEL}.png' - savepath = savedir / savename - plt.savefig(savepath, - dpi=300, - bbox_inches='tight') - -if __name__ == '__main__': - # set the details - dft_test = run_validation_and_plot(testing=True) - dft0 = run_validation_and_plot(plot_everything=True) \ No newline at end of file diff --git a/nuc_morph_analysis/analyses/density/pseudocell_based_density_analysis.py b/nuc_morph_analysis/analyses/density/extra_checks/check_for_size_dependence.py similarity index 53% rename from nuc_morph_analysis/analyses/density/pseudocell_based_density_analysis.py rename to nuc_morph_analysis/analyses/density/extra_checks/check_for_size_dependence.py index dc7262a4..bf9e5852 100644 --- a/nuc_morph_analysis/analyses/density/pseudocell_based_density_analysis.py +++ b/nuc_morph_analysis/analyses/density/extra_checks/check_for_size_dependence.py @@ -1,4 +1,5 @@ #%% +# this code is not used in the final analysis, but was used to confirm that there is no correlation between density and nuclear area from nuc_morph_analysis.lib.visualization.reference_points import COLONY_COLORS, COLONY_LABELS import matplotlib.pyplot as plt import numpy as np @@ -8,6 +9,9 @@ from nuc_morph_analysis.lib.preprocessing import filter_data from pathlib import Path from sklearn.linear_model import LinearRegression +from matplotlib.ticker import MaxNLocator +import matplotlib + #%% # set figure directory @@ -21,57 +25,23 @@ dfm = filter_data.all_timepoints_minimal_filtering(df) #%% # plot density over time for each colony along colony time -x_col = "colony_time" -column_val = 'label_img' -feature_list = ['2d_area_nuc_cell_ratio','density','2d_area_nucleus','2d_area_pseudo_cell', - '2d_area_cyto','inv_cyto_density','2d_intensity_min_edge','2d_intensity_max_edge','2d_intensity_mean_edge' - ] -for y_col in feature_list: - fig,ax = plt.subplots(figsize=(4,3)) - - for colony in ['small','medium','large']: - - dfsub = dfm[dfm['colony']==colony].copy() - dfsub.dropna(subset=[y_col],inplace=True) - - # create a pivot of the dataframe to get a 2d array of track_id x timepoint with each value being the density - pivot = dfsub.pivot(index=x_col, columns=column_val, values=y_col) - pivot.head() +matplotlib.rcParams.update({'font.size': 8}) +matplotlib.rcParams.update({'axes.titlesize': 8}) +matplotlib.rcParams.update({'axes.labelsize': 8}) +matplotlib.rcParams.update({'xtick.labelsize': 8}) +matplotlib.rcParams.update({'ytick.labelsize': 8}) +matplotlib.rcParams.update({'legend.fontsize': 7}) +matplotlib.rcParams.update({'legend.title_fontsize': 8}) +matplotlib.rcParams.update({'figure.titlesize': 8}) +# make axis linewidth thinner +matplotlib.rcParams.update({'axes.linewidth': 0.5}) - mean = pivot.median(axis=1) - lower = pivot.quantile(0.05,axis=1) - upper = pivot.quantile(0.95,axis=1) - - xscale_factor, xlabel, xunit, xlimit = get_plot_labels_for_metric(x_col) - x = mean.index * xscale_factor - yscale_factor, ylabel, yunit, ylimit = get_plot_labels_for_metric(y_col) - y = mean.values * yscale_factor - yl = lower.values * yscale_factor - yu = upper.values * yscale_factor - - ax.plot(x, y, label=COLONY_LABELS[colony], color=COLONY_COLORS[colony]) - ax.fill_between(x, yl, yu, alpha=0.2, color=COLONY_COLORS[colony], - edgecolor='none') - ax.set_xlabel(f"{xlabel} {xunit}") - ax.set_ylabel(f"{ylabel} {yunit}\n(90% interpercentile range)") - - - ax.legend(loc="upper right", handletextpad=0.7, frameon=False) - plt.tight_layout() - for ext in ['.png','.pdf']: - save_and_show_plot( - f"{figdir}/{y_col}_vs_{x_col}_by_colony", - file_extension=ext, - dpi=300, - transparent=False, - ) - plt.show() #%% -# plot density as a function of nucleus size (and compare to old density metric) +# plot density as a function of nucleus size colony='medium' x_col = '2d_area_nucleus' -for yi,y_col in enumerate(['2d_area_nuc_cell_ratio','density','inv_cyto_density']): +for yi,y_col in enumerate(['2d_area_nuc_cell_ratio']): dfsub = dfm[dfm['colony']==colony].copy() dfsub.dropna(subset=[y_col],inplace=True) @@ -88,6 +58,7 @@ ax.set_xlabel(f"{xlabel} {xunit}") ax.set_ylabel(f"{ylabel} {yunit}") + # add best fit line reg = LinearRegression().fit(x.reshape(-1,1), y) y_pred = reg.predict(x.reshape(-1,1)) diff --git a/nuc_morph_analysis/analyses/density/effect_of_resolution_level.py b/nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py similarity index 100% rename from nuc_morph_analysis/analyses/density/effect_of_resolution_level.py rename to nuc_morph_analysis/analyses/density/extra_checks/confirm_identical_density_at_different_zarr_resolutions.py diff --git a/nuc_morph_analysis/analyses/density/validate_new_filters.py b/nuc_morph_analysis/analyses/density/extra_checks/visualize_filter_for_uncaught_pseudo_cell_artifacts.py similarity index 97% rename from nuc_morph_analysis/analyses/density/validate_new_filters.py rename to nuc_morph_analysis/analyses/density/extra_checks/visualize_filter_for_uncaught_pseudo_cell_artifacts.py index a7b34f94..5694a8d9 100644 --- a/nuc_morph_analysis/analyses/density/validate_new_filters.py +++ b/nuc_morph_analysis/analyses/density/extra_checks/visualize_filter_for_uncaught_pseudo_cell_artifacts.py @@ -1,4 +1,7 @@ #%% +# this code visualizes cells identified in this workflow `remove_uncaught_pseudo_cell_artifacts(df, apply_to_nucleus_too, verbose)` +# and highlights the cells that were removed + from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow from pathlib import Path from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering diff --git a/nuc_morph_analysis/analyses/density/watershed_validate.py b/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py similarity index 86% rename from nuc_morph_analysis/analyses/density/watershed_validate.py rename to nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py index b5b99a45..716b17c3 100644 --- a/nuc_morph_analysis/analyses/density/watershed_validate.py +++ b/nuc_morph_analysis/analyses/density/extra_checks/visually_validate_watershed_psuedo_cell_seg_workflow.py @@ -14,13 +14,13 @@ def get_contours_from_pair_of_2d_seg_image(nuc_mip,cell_mip,dft=None): contour_list = [] # (label, nucleus_contour, cell_contour, color) - rgb_array0_255, _, _ = return_glasbey_on_dark() - for label_img in range(0,nuc_mip.max()+1): + rgb_array0_255, cmapper, _ = return_glasbey_on_dark() + for label_img in range(1,nuc_mip.max()+1): if dft is not None: #ask if the label_img is in the dataframe if label_img not in dft['label_img'].values: continue - color = np.float64(rgb_array0_255[label_img % len(rgb_array0_255)]) + color = np.float64(cmapper(label_img)) # get the nucleus boundary nucleus_boundary = nuc_mip == label_img @@ -36,15 +36,39 @@ def get_contours_from_pair_of_2d_seg_image(nuc_mip,cell_mip,dft=None): return contour_list -def draw_contours_on_image(axlist,contour_list,new_color=None): +def draw_contours_on_image(axlist,contour_list,new_color=None,filled=False,colorize=False,dft=None,linewidth=1,colorfeat='2d_area_nuc_cell_ratio',cmapstr='PiYG',usedfcolors=False,cmap=None): # draw contours + if dft is not None: + minval = dft[colorfeat].min() + maxval = dft[colorfeat].max() for label, nuc_contours, cell_contours, color in contour_list: if new_color is not None: color = new_color + + if usedfcolors: + if label not in dft['label_img'].values: + color = np.asarray((0.4,0.4,0.4,1)) + else: + color = np.float64(cmap(dft.loc[dft['label_img']==label,colorfeat].values[0])) + + if colorize: + if label not in dft['label_img'].values: + continue + value = dft[dft['label_img']==label][colorfeat].values[0] + #rescale between 0 and 255 + new_value = (value - minval) / (maxval - minval) + cmap = cm.get_cmap(cmapstr) + color = np.float64(cmap(new_value)) + for contour in nuc_contours: - axlist.plot(contour[:, 1], contour[:, 0], linewidth=1, color=color/255) + axlist.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, color=color) + if filled: # now draw as filled + axlist.fill(contour[:, 1], contour[:, 0], color=color, alpha=0.5) for contour in cell_contours: - axlist.plot(contour[:, 1], contour[:, 0], linewidth=1, color=color/255) + axlist.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, color=color) + if filled: # now draw as filled + axlist.fill(contour[:, 1], contour[:, 0], color=color, alpha=0.5) + return axlist def plot_colorized_image_with_contours(img_dict,dft,feature,cmapstr,colony='test',TIMEPOINT=None,RESOLUTION_LEVEL=None,categorical=False,draw_contours=True): @@ -71,8 +95,6 @@ def plot_colorized_image_with_contours(img_dict,dft,feature,cmapstr,colony='test cmap = cm.get_cmap(cmapstr) if categorical: cimg = np.round(cimg).astype('uint16') - - # rgb = np.take(np.uint16(cmaparr*255),cimg.astype('uint16'),axis=0) # create the figure fig, axlist = plt.subplots(1, 1, figsize=(6, 4)) @@ -88,7 +110,7 @@ def plot_colorized_image_with_contours(img_dict,dft,feature,cmapstr,colony='test if draw_contours: # create the contours contour_list = get_contours_from_pair_of_2d_seg_image(nuc_mip,cell_mip,dft) - draw_contours_on_image(axlist,contour_list,new_color=np.asarray((200,0,200))) + draw_contours_on_image(axlist,contour_list,new_color=np.asarray((200,0,200))/255) # remove the axis plt.axis('off') plt.xlim([x1,x1+w]) @@ -219,6 +241,5 @@ def run_validation_and_plot(TIMEPOINT=48,colony='medium',RESOLUTION_LEVEL=1,plot if __name__ == '__main__': # set the details - # dft_test = run_validation_and_plot(testing=True) - # dft0 = run_validation_and_plot(plot_everything=True) - dft0 = run_validation_and_plot(247,colony='small',RESOLUTION_LEVEL=1,plot_everything=True) \ No newline at end of file + dft_test = run_validation_and_plot(testing=True,plot_everything=False) + dft0 = run_validation_and_plot(testing=False,plot_everything=False) \ No newline at end of file diff --git a/nuc_morph_analysis/analyses/density/figure_watershed_based_density_schematic.py b/nuc_morph_analysis/analyses/density/figure_watershed_based_density_schematic.py new file mode 100644 index 00000000..6419976c --- /dev/null +++ b/nuc_morph_analysis/analyses/density/figure_watershed_based_density_schematic.py @@ -0,0 +1,224 @@ +#%% +# code for SuppFigS4 panel C +from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow, pseudo_cell_helper, pseudo_cell_testing_helper +from pathlib import Path +import pandas as pd +from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering +from nuc_morph_analysis.lib.preprocessing import filter_data, load_data + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm +from nuc_morph_analysis.lib.visualization.plotting_tools import colorize_image, get_plot_labels_for_metric +from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot + +from nuc_morph_analysis.analyses.dataset_images_for_figures.figure_helper import return_glasbey_on_dark + +from nuc_morph_analysis.analyses.density.extra_checks.visually_validate_watershed_psuedo_cell_seg_workflow import get_contours_from_pair_of_2d_seg_image, draw_contours_on_image +from nuc_morph_analysis.analyses.dataset_images_for_figures.figure_helper import INTENSITIES_DICT + +from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS + +import matplotlib +matplotlib.rcParams['pdf.fonttype'] = 42 +matplotlib.rcParams['ps.fonttype'] = 42 +matplotlib.rcParams['font.size'] = 7 + +def determine_crop_size_and_location_from_track(track,df,crop_w=250,crop_h=250,RESOLUTION_LEVEL=1): + """ + determine the crop size and location based on the track_id + """ + crop_w = 250 + crop_h = 250 + track_df = df[(df['track_id']==track)] + track_x = track_df['centroid_x'].values[0] + track_y = track_df['centroid_y'].values[0] + if RESOLUTION_LEVEL ==1: + track_x = np.uint16(track_x//2.5) + track_y = np.uint16(track_y//2.5) + + track_x = np.uint16(track_x - crop_w//2) + track_y = np.uint16(track_y - crop_h//2) + return track_x,track_y,crop_w,crop_h + +def determine_colormaps(img,key,crop_exp): + # determine colormaps and vmin,vmax + if img.dtype == 'bool': + cmap = cm.get_cmap('Greys_r') + vmin = 0 + vmax = 1 + elif img.dtype == 'uint16': + _, cmap, _ = return_glasbey_on_dark(N=img.max()+1,from_list=True) + vmax = cmap.N-1 + vmin = 0 + else: + cmap = cm.get_cmap('Greys_r') + vmin=0 + vmax=np.max(img[crop_exp]) + + if key=='raw_image': + cmap = cm.get_cmap('Greys_r') + vmin = INTENSITIES_DICT['egfp_max'][0] + vmax= INTENSITIES_DICT['egfp_max'][1] + return cmap, vmin, vmax + + +def run_validation_and_plot(TIMEPOINT=88,track=EXAMPLE_TRACKS["pseudocell_density_example"],colony='medium',RESOLUTION_LEVEL=1,plot_everything=False, testing=False): + """ + run an image through the watershed based pseudo cell segmentation and examine the outputs (as full fov and crop within that fov) + optionally, run a test image through the same pipeline + + Parameters + ---------- + TIMEPOINT : int, optional + The timepoint to analyze, by default 88 + track : int, optional + The track to analyze, by default 81463 + colony : str, optional + The colony to analyze, by default 'medium' + RESOLUTION_LEVEL : int, optional + The resolution level to analyze, by default 1 + plot_everything : bool, optional + Whether to plot a large set of extra images colored by features and with contours, by default True + testing : bool, optional + Whether to run a test image through the pipeline, by default False + + Returns + ------- + pd.DataFrame + The dataframe containing the pseudo cell segmentation results + if plot_everything is False + if plot_everything is True, returns the full dataframe (after merging with the tracking dataset) + + """ + + # perform watershed based pseudo cell segmentation + if testing: # on test data + labeled_nucleus_image = pseudo_cell_testing_helper.make_nucleus_image_array() + df_2d, img_dict = pseudo_cell_helper.get_pseudo_cell_boundaries(labeled_nucleus_image, return_img_dict=True) + colony = 'test' + else: # or on real data + df_2d, img_dict = watershed_workflow.get_image_and_run(colony, TIMEPOINT, RESOLUTION_LEVEL, return_img_dict=True) + + # load the raw image and add to image dict + raw_img_reader = load_data.get_dataset_original_file_reader(colony) + raw_img = raw_img_reader.get_image_dask_data("ZYX", T=TIMEPOINT, C=0).max(axis=0).compute() + img_dict['raw_image'] = (raw_img,'mEGFP-tagged lamin B1') + + # load the tracking dataframe and apply appropriate filters + df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline') + df = filter_data.all_timepoints_minimal_filtering(df) + dfm = df.copy() + + # now get the subset of the dataframe for the colony and timepoint + dfsub = dfm[dfm['colony']==colony] + dft = dfsub[dfsub['index_sequence']==TIMEPOINT] + + track_x,track_y,crop_w,crop_h = determine_crop_size_and_location_from_track(track,dft,crop_w=250,crop_h=250,RESOLUTION_LEVEL=1) + + mip = img_dict['mip_of_labeled_image'][0] #use mip to determine full size crop + + # now iterate through crop sizes + for full_crop, sizes in [('crop',(track_x,track_y,crop_w,crop_h)),('full',(0,0,mip.shape[1],mip.shape[0]))]: + x1,y1,w,h = sizes + crop_exp = np.index_exp[y1:y1+h,x1:x1+w] + key_list = ['mip_of_labeled_image','distance','pseudo_cells_img','colorize'] + + + nrows = 1 + ncols = len(key_list) + assert ncols>1 + fig,axr = plt.subplots(nrows,ncols,figsize=(ncols*1.5,nrows*1.5)) + axx = np.asarray([axr]).flatten() + + for i, key in enumerate(key_list): + ax = axx[i] + assert type(ax) is plt.Axes + + if key in ['overlay','colorize']: + img = np.zeros_like(img_dict['mip_of_labeled_image'][0]) + cell_img = img_dict['pseudo_cells_img'][0].copy() + cell_img[img_dict['cell_shed_bin'][0]==0] = 0 # remove the edges + contour_list = get_contours_from_pair_of_2d_seg_image(img_dict['mip_of_labeled_image'][0],cell_img,) + label = 'Cell and Nucleus Boundaries' if key=='overlay' else 'Colored by Nucleus\nto Cell Area Ratio' + else: + img = img_dict[key][0] + label = img_dict[key][1] + + imgcmap, vmin, vmax = determine_colormaps(img,key,crop_exp) + + mappable = ax.imshow(img, + interpolation='nearest', + cmap = imgcmap, + vmin = vmin, + vmax = vmax, + origin='lower', + ) + + # adjust axes + ax.set_title(label) + ax.axis('off') + ax.set_xlim([x1,x1+w]) + ax.set_ylim([y1,y1+h]) + + + position = ax.get_position().bounds + if key == 'overlay': + draw_contours_on_image(ax,contour_list,filled=True,linewidth=0.5) + if key == 'colorize': + labels_in_img = np.unique(img_dict['pseudo_cells_img'][0][crop_exp]) + dftsub = dft[dft['label_img'].isin(labels_in_img)] + colorfeat='2d_area_nuc_cell_ratio' #feature + cmapstr = 'cool' # colormap + color_values = dftsub[colorfeat].values + + draw_contours_on_image(ax,contour_list,filled=True,colorize=True,dft=dftsub,linewidth=0.5,colorfeat=colorfeat,cmapstr=cmapstr) + + colorbarmin = np.nanmin(color_values) + colorbarmax = np.nanmax(color_values) + colorbar = plt.cm.ScalarMappable(cmap=cmapstr,norm=plt.Normalize(vmin=colorbarmin,vmax=colorbarmax)) + colorbar.set_array([]) + colorbar.set_clim(colorbarmin,colorbarmax) + cbar = plt.colorbar(colorbar,ax=ax,location='bottom') + _,label,unit,_ = get_plot_labels_for_metric(colorfeat) + cbar.set_label(f"{label} {unit}") + cbar.set_ticks([colorbarmin,colorbarmax]) + cbar.set_ticklabels([f'{colorbarmin:.2f}',f'{colorbarmax:.2f}']) + + if key in ['distance']: + + distance_scale = 0.108 # pixel size + if RESOLUTION_LEVEL == 1: + distance_scale *= 2.5 + colorbar = plt.cm.ScalarMappable(cmap=imgcmap,norm=plt.Normalize(vmin=vmin,vmax=vmax)) + cbar = plt.colorbar(colorbar,ax=ax,location='bottom') + cbar.set_ticks([vmin,vmax]) + cbar.set_ticklabels([f'{vmin*distance_scale:.2f}',f'{vmax*distance_scale:.2f}']) + cbar.set_label('Distance Transform (µm)') + + if key in ['distance','colorize']: + # adjust the axis back to its original position + ax.set_position(position) + # adjust position of colorbar + cbar_position = cbar.ax.get_position() + cbar.ax.set_position([cbar_position.x0,cbar_position.y0-0.15,cbar_position.width,cbar_position.height]) # for bottom location + + # now save the figure + savedir = Path(__file__).parent / 'figures' / 'watershed_figure_illustration' + savedir.mkdir(exist_ok=True,parents=True) + + ext = '.pdf' + savename = f'{colony}_{track}_{TIMEPOINT}_{full_crop}_res{RESOLUTION_LEVEL}_{cmapstr}' + savepath = savedir / savename + save_and_show_plot(str(savepath), + file_extension=ext, + figure=fig, + transparent=True, + keep_open=True, + **{'dpi':600} + ) + +if __name__ == '__main__': + # set the details + dft1 = run_validation_and_plot(88,track=81463,colony='medium',RESOLUTION_LEVEL=1,plot_everything=False) + diff --git a/nuc_morph_analysis/analyses/density/run_all_density_workflows.py b/nuc_morph_analysis/analyses/density/run_all_density_workflows.py deleted file mode 100644 index f0562b38..00000000 --- a/nuc_morph_analysis/analyses/density/run_all_density_workflows.py +++ /dev/null @@ -1,6 +0,0 @@ -from nuc_morph_analysis.analyses.density import watershed_validate -watershed_validate.run_validation_and_plot(plot_everything=False) -from nuc_morph_analysis.analyses.density import effect_of_resolution_level -from nuc_morph_analysis.analyses.density import pseudocell_based_density_analysis - -print('done') \ No newline at end of file diff --git a/nuc_morph_analysis/analyses/height/figure_3_s4_workflow.py b/nuc_morph_analysis/analyses/height/figure_3_s4_workflow.py index c47ead4f..fd37c399 100644 --- a/nuc_morph_analysis/analyses/height/figure_3_s4_workflow.py +++ b/nuc_morph_analysis/analyses/height/figure_3_s4_workflow.py @@ -1,15 +1,16 @@ # %% from nuc_morph_analysis.analyses.colony_context.colony_context_analysis import plot_radial_profile from nuc_morph_analysis.lib.preprocessing.global_dataset_filtering import load_dataset_with_features -from nuc_morph_analysis.lib.preprocessing import load_data, filter_data, global_dataset_filtering +from nuc_morph_analysis.lib.preprocessing import load_data, filter_data from nuc_morph_analysis.lib.preprocessing.load_data import get_dataset_pixel_size from nuc_morph_analysis.analyses.height import plot -from nuc_morph_analysis.analyses.height.plot_crowding import plot_density_schematic from nuc_morph_analysis.analyses.height.toymodel import toymodel from nuc_morph_analysis.analyses.height.centroid import ( get_centroid, get_neighbor_centroids, ) +from nuc_morph_analysis.analyses.density import figure_watershed_based_density_schematic +from nuc_morph_analysis.analyses.neighbor_of_X import figure_mitotic_filtering_examples from nuc_morph_analysis.lib.preprocessing.load_data import get_dataset_pixel_size from pathlib import Path import matplotlib.pyplot as plt @@ -67,13 +68,13 @@ # plot density schematic pix_size = get_dataset_pixel_size("medium") -plot_density_schematic( - df_timepoint, track_centroid, neighbor_centroids, frame_centroids, pix_size, figdir -) # plot colony-averaged density over aligned colony time plot.density_colony_time_alignment(df_all, pixel_size, interval, time_axis="colony_time") +figure_watershed_based_density_schematic.run_validation_and_plot() #SuppFigS4 panel C +figure_mitotic_filtering_examples.run_validation_and_plot() #SuppFigS4 panel D, this code takes ~6 min to run + # %% # Run and plot toy model toymodel() diff --git a/nuc_morph_analysis/analyses/height/plot.py b/nuc_morph_analysis/analyses/height/plot.py index 58a6306e..cac15359 100644 --- a/nuc_morph_analysis/analyses/height/plot.py +++ b/nuc_morph_analysis/analyses/height/plot.py @@ -116,10 +116,11 @@ def calculate_mean_density(df, scale): """ mean = [] standard_dev = [] + feature_col = "2d_area_nuc_cell_ratio" for _, df_frame in df.groupby("index_sequence"): - density = df_frame.density * scale - mean.append(np.mean(density)) - standard_dev.append(np.std(density)) + density = df_frame[feature_col].values * scale + mean.append(np.nanmean(density)) + standard_dev.append(np.nanstd(density)) return mean, standard_dev @@ -159,35 +160,42 @@ def density_colony_time_alignment( ------- Plot of mean nuclear height across the colony over time for each colony. """ - scale, label, units, _ = get_plot_labels_for_metric("density") plt.close() fig, ax = plt.subplots(1, 1, figsize=(5, 4)) + feature_col = "2d_area_nuc_cell_ratio" + scale, label, units, _ = get_plot_labels_for_metric(feature_col) + for colony, df_colony in df.groupby("colony"): df_colony = df_colony.sort_values("index_sequence") - mean_density, std_density = calculate_mean_density(df_colony, scale) color = COLONY_COLORS[colony] if time_axis == "real_time": - time = df_colony.index_sequence.unique() * interval / 60 + time_col = "index_sequence" x_label = "Real Time (hr)" if time_axis == "colony_time": - time = df_colony.colony_time.unique() * interval / 60 + time_col = "colony_time" x_label = "Aligned Colony Time (hr)" + + grouper = df_colony[[time_col] + [feature_col]].groupby(time_col)[ + feature_col + ] + mean_density = grouper.mean() * scale if error == "std": - upper = np.array(mean_density) + np.array(std_density) - lower = np.array(mean_density) - np.array(std_density) + std_density = grouper.std() * scale + lower = mean_density - std_density + upper = mean_density + std_density if error == "percentile": - grouper = df_colony[["index_sequence"] + ["density"]].groupby("index_sequence")[ - "density" - ] - lower = grouper.quantile(0.05) - upper = grouper.quantile(0.95) + lower = grouper.quantile(0.05) * scale + upper = grouper.quantile(0.95) * scale + + time = mean_density.index.values * interval / 60 + ax.fill_between( time, - lower * scale, - upper * scale, + lower, + upper, alpha=0.12, color=color, zorder=0, @@ -198,14 +206,13 @@ def density_colony_time_alignment( time, mean_density, linewidth=1.2, color=color, label=COLONY_LABELS[colony], zorder=20 ) - ax.set_ylim(0.0005, 0.0065) ax.set_ylabel(f"Average Density \n Across Colony {units}") ax.set_xlabel(x_label) if show_legend is True: ax.legend(loc="upper right", handletextpad=0.7, frameon=False) plt.tight_layout() save_and_show_plot( - f"{figdir}/avg_density_colony_{time_axis}_alignment", + f"{figdir}/avg_density_colony_{time_axis}_alignment-{feature_col}", file_extension=".pdf", dpi=300, transparent=True, diff --git a/nuc_morph_analysis/analyses/height/plot_crowding.py b/nuc_morph_analysis/analyses/height/plot_crowding.py deleted file mode 100644 index e4b7e986..00000000 --- a/nuc_morph_analysis/analyses/height/plot_crowding.py +++ /dev/null @@ -1,139 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar - -from nuc_morph_analysis.lib.visualization.movie_tools import get_colorized_img -from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot - - -def get_axis_limits_from_points(points, padding=250): - """ - Get the axis limits for a plot based on a set of points. - - Parameters - ---------- - points : numpy.ndarray - An array of shape (N, 2) representing the x and y coordinates of the points. - padding : int, optional - The amount of padding to add to the axis limits (default is 250). - - Returns - ------- - xlim : list - A list containing the minimum and maximum x-axis limits. - ylim : list - A list containing the minimum and maximum y-axis limits. - """ - min_x = np.min(points[:, 0]) - max_x = np.max(points[:, 0]) - min_y = np.min(points[:, 1]) - max_y = np.max(points[:, 1]) - xlim = [min_x - padding, max_x + padding] - ylim = [min_y - padding, max_y + padding] - return xlim, ylim - - -def plot_density_schematic( - df_timepoint, track_centroid, neighbor_centroids, frame_centroids, pix_size, figdir -): - """ - Plot the schematic showing how density is calculated - - Parameters - ---------- - df_timepoint : DataFrame - The timepoint data containing information about the nuclei. - track_centroid : tuple - The centroid coordinates of the tracked nucleus. - neighbor_centroids : array-like - The centroid coordinates of the neighboring nuclei. - frame_centroids : array-like - The centroid coordinates of all nuclei in the frame. - pix_size : float - The pixel size in micrometers. - figdir : str - The directory to save the generated figure. - - Returns - ------- - None - This function does not return anything. The figure is saved in the specified directory. - """ - img = get_colorized_img(df_timepoint, "index_sequence", 1, filter_vals=False) - fig, axs = plt.subplots(1, 2, figsize=(8, 5), dpi=300) - for ct, ax in enumerate(axs): - ax.imshow(img, cmap="Greys_r", origin="upper") - ax.tick_params( - axis="both", - which="both", - bottom=False, - top=False, - left=False, - right=False, - labelbottom=False, - labelleft=False, - ) - - # draw arrows to each neighbor - for centroid in neighbor_centroids: - ax.arrow( - track_centroid[0], - track_centroid[1], - centroid[0] - track_centroid[0], - centroid[1] - track_centroid[1], - head_width=10, - head_length=10, - fc="blue", - ec="blue", - ) - - if ct == 0: # plot entire colony with all nuclei - xlim, ylim = get_axis_limits_from_points(frame_centroids, padding=250) - ax.set_xlim(xlim) - ax.set_ylim(ylim[1], ylim[0]) - sz = 10 - scale = 50 - size_vertical = 30 - scale_bar_y_pos = 0.02 - else: # plot zoomed in view of nucleus and neighbors - avg_neighbor_distance = ( - np.mean(np.linalg.norm(neighbor_centroids - track_centroid, axis=1)) * pix_size - ) - density = 1 / avg_neighbor_distance**2 - xlim, ylim = get_axis_limits_from_points(neighbor_centroids, padding=50) - ax.set_xlim(xlim) - ax.set_ylim(ylim[1], ylim[0]) - sz = 50 - scale = 10 - size_vertical = 6 - title_str = ( - f"Mean distance to neighbors $(r)$: {avg_neighbor_distance:.2f}$\\mu m$" - + "\n" - + f"Density $(1 / r ^2)$: {density:.2g}/$\\mu m^2$" - ) - scale_bar_y_pos = -0.1 - - ax.set_title(title_str) - - # create scalebar - scalebar = AnchoredSizeBar( - ax.transData, - scale / pix_size, - f"{scale} $\\mu m$", - "lower right", - bbox_to_anchor=(0.95, scale_bar_y_pos), - bbox_transform=ax.transAxes, - color="black", - pad=0, - borderpad=0, - frameon=False, - size_vertical=size_vertical, - label_top=False, - ) - ax.add_artist(scalebar) - - # draw nucleus centroid - ax.scatter(track_centroid[0], track_centroid[1], color="red", s=sz) - - plt.tight_layout(pad=0) - save_and_show_plot(f"{figdir}/density_schematic", figure=fig, bbox_inches="tight") diff --git a/nuc_morph_analysis/analyses/height/toymodel.py b/nuc_morph_analysis/analyses/height/toymodel.py index 263f2a87..24d60abd 100644 --- a/nuc_morph_analysis/analyses/height/toymodel.py +++ b/nuc_morph_analysis/analyses/height/toymodel.py @@ -9,6 +9,8 @@ from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, load_data, filter_data from nuc_morph_analysis.lib.preprocessing.neighbor_analysis import compute_density +from nuc_morph_analysis.lib.preprocessing import add_times +from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric # make plot text editable in Illustrator import matplotlib @@ -17,14 +19,15 @@ plt.rcParams["font.family"] = "Arial" -def toymodel(nc_ratio=0.29, cvol=0.5e6, num_workers=1): +def toymodel(nc_ratio=0.29, cvol=0.5e6, cell_H_mod=0, num_workers=1): """ main function to run args -------- nc_ratio: ratio of nuclear to cytoplastic volumes - default is 0.3 - cvol: target volume of nuclei to fit toy model to + cvol: target volume of nuclei to fit toy model to, default is 0.5e6 pixels^3 ~ 630 um^3 + cell_H_mod: height difference between top of nucleus and top of cell to use for toy model. default is 0 num_workers: how many workers to use for multiprocessing """ save_path = Path(__file__).parent / "figures" @@ -37,13 +40,13 @@ def toymodel(nc_ratio=0.29, cvol=0.5e6, num_workers=1): data["height"] = data["height"] * pix_size data["dist"] = data["dist"] * pix_size - stats = get_toy_model(data, cvol_um_cell) + stats = get_toy_model(data, cvol_um_cell, cell_H_mod) plot_toy_model(data, stats, save_path) plot_growth_rate(data, save_path) -def get_toy_model(data, cvol_um_cell): +def get_toy_model(data, cvol_um_cell, cell_H_mod=0): """ Fit toy model to set of nuclei that all have similar volumes do this by interpolating height values, and then for @@ -51,15 +54,22 @@ def get_toy_model(data, cvol_um_cell): 1. radius of a cylinder as R = sqrt(V/(2 pi H)) 2. radius of a hexagon as R = sqrt(2V/3sqrt(3)H) Return this information as distance = 2*R via a dataframe + + arguments + ----- + data: dataframe with real data + cvol_um_cell: volume of a single cell + cell_H_mod: height difference between top of nucleus and top of cell to use for toy model. default is 0 """ H = np.linspace(data["height"].min(), data["height"].max(), 100) + cell_H = H + cell_H_mod # Volume of cylinder is pi r^2 H # distance = np.sqrt(cvol/(np.pi * H))*2 for cylinder - dist_cylindrical = 2 * np.sqrt(cvol_um_cell / (np.pi * H)) # assuming cylindrical toy model + dist_cylindrical = 2 * np.sqrt(cvol_um_cell / (np.pi * cell_H)) # assuming cylindrical toy model # Volume of hexagon is (3√3/2)s^2 × h # s = sqrt(2V/3sqrt(3)H) - S = np.sqrt(cvol_um_cell * 2 / (H * 3 * np.sqrt(3))) + S = np.sqrt(cvol_um_cell * 2 / (cell_H * 3 * np.sqrt(3))) # normal to hexagon side is S *sqrt(3)/2 # distance is 2 * S * sqrt(3)/2 dist_hexagonal = 2 * S * np.sqrt(3) / 2 @@ -67,8 +77,11 @@ def get_toy_model(data, cvol_um_cell): stats = pd.DataFrame( { "height": H, + "cell_height": cell_H, "dist_cylindrical": dist_cylindrical, "dist_hexagonal": dist_hexagonal, + "cell_vol": cvol_um_cell, + "cell_height_diff": cell_H_mod, } ) return stats @@ -114,7 +127,8 @@ def plot_toy_model(data, toy_stats, save_path=Path("./")): ) axes.set_ylim(10, 40) axes.legend() - fig.savefig(save_path / "toymodel.pdf", bbox_inches="tight") + savename = f"toymodel.pdf" + fig.savefig(save_path / savename, bbox_inches="tight") def plot_growth_rate(data, save_path=Path("./")): @@ -182,11 +196,12 @@ def get_data(cvol, save_path=Path("./"), num_workers=1): df_full = filter_data.all_timepoints_full_tracks(df) df_ft = df_full[df_full["colony"].isin(["small", "medium", "large"])].reset_index() + scale, _, _, _ = get_plot_labels_for_metric("volume") # plot chosen volume fig, ax = plt.subplots(1, 1, figsize=(10, 4)) for track, df_track in df_ft.groupby("track_id"): - ax.plot(df_track.index_sequence, df_track.volume, alpha=0.5, lw=0.3) - ax.axhline(y=cvol) + ax.plot(df_track.index_sequence, df_track.volume * scale, alpha=0.5, lw=0.3) + ax.axhline(y=cvol * scale) fig.savefig(save_path / "chosen_volume.pdf") # get similar volumes to chosen volume @@ -209,6 +224,20 @@ def get_data(cvol, save_path=Path("./"), num_workers=1): return neigh_stats, cvol +def determine_volume_at_middle_of_cell_cycle(): + + df = global_dataset_filtering.load_dataset_with_features(remove_growth_outliers=True) + df = filter_data.filter_all_outliers(df) + df_full = filter_data.all_timepoints_full_tracks(df) + df_full = add_times.digitize_time_column(df_full,minval=0,maxval=1,step_size=0.05,time_col='normalized_time',new_col='dig_time') + grouper = df_full[['volume','dig_time']].groupby('dig_time') + dfg = grouper.mean() + scale,_,_,_ = get_plot_labels_for_metric('volume') + mean_vol = dfg.loc[0.5,'volume'].mean()*scale + print(mean_vol) + plt.plot(dfg.index,dfg['volume']*scale) + plt.show() + if __name__ == "__main__": toymodel() diff --git a/nuc_morph_analysis/analyses/neighbor_of_X/figure_mitotic_filtering_examples.py b/nuc_morph_analysis/analyses/neighbor_of_X/figure_mitotic_filtering_examples.py new file mode 100644 index 00000000..67080620 --- /dev/null +++ b/nuc_morph_analysis/analyses/neighbor_of_X/figure_mitotic_filtering_examples.py @@ -0,0 +1,207 @@ +#%% +#SuppFigS4 panel D, this code takes ~6 min to run +from nuc_morph_analysis.lib.preprocessing.twoD_zMIP_area import watershed_workflow +from pathlib import Path +from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering +from nuc_morph_analysis.lib.preprocessing import load_data +from matplotlib.colors import ListedColormap + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm +from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot + +from nuc_morph_analysis.analyses.dataset_images_for_figures.figure_helper import return_glasbey_on_dark +from nuc_morph_analysis.lib.visualization.example_tracks import EXAMPLE_TRACKS + +from nuc_morph_analysis.analyses.density.extra_checks.visually_validate_watershed_psuedo_cell_seg_workflow import get_contours_from_pair_of_2d_seg_image, draw_contours_on_image +from nuc_morph_analysis.analyses.dataset_images_for_figures.figure_helper import INTENSITIES_DICT +from tqdm import tqdm + + +import matplotlib +matplotlib.rcParams['pdf.fonttype'] = 42 +matplotlib.rcParams['ps.fonttype'] = 42 +matplotlib.rcParams['font.size'] = 7 + +def determine_crop_size_and_location_from_track(track,df,crop_w=250,crop_h=250,RESOLUTION_LEVEL=1): + """ + determine the crop size and location based on the track_id + """ + crop_w = 250 + crop_h = 250 + track_df = df[(df['track_id']==track)] + track_x = track_df['centroid_x'].values[0] + track_y = track_df['centroid_y'].values[0] + if RESOLUTION_LEVEL ==1: + track_x = np.uint16(track_x//2.5) + track_y = np.uint16(track_y//2.5) + + track_x = np.uint16(track_x - crop_w//2) + track_y = np.uint16(track_y - crop_h//2) + return track_x,track_y,crop_w,crop_h + +def determine_colormaps(img,key,crop_exp): + # determine colormaps and vmin,vmax + if img.dtype == 'bool': + cmap = cm.get_cmap('Greys_r') + vmin = 0 + vmax = 1 + elif img.dtype == 'uint16': + _, cmap, _ = return_glasbey_on_dark(N=img.max()+1,from_list=True) + vmax = cmap.N-1 + vmin = 0 + else: + cmap = cm.get_cmap('Greys_r') + vmin=0 + vmax=np.max(img[crop_exp]) + + if key=='raw_image': + cmap = cm.get_cmap('Greys_r') + vmin = INTENSITIES_DICT['egfp_max'][0] + vmax= INTENSITIES_DICT['egfp_max'][1] + return cmap, vmin, vmax + +def run_validation_and_plot(track_id=EXAMPLE_TRACKS['pseudocell_mitoticfilter_example'],RESOLUTION_LEVEL=1,frames_before=0,frames_after=7,w=300,plot_everything=False): + """ + run an image through the watershed based pseudo cell segmentation and examine the outputs + optionally, run a test image through the same pipeline + + Parameters + ---------- + track_id : int, optional + The track to analyze, by default 87135 + RESOLUTION_LEVEL : int, optional + The resolution level to analyze, by default 1 + plot_everything : bool, optional + Whether to plot a large set of extra images colored by features and with contours, by default True + testing : bool, optional + Whether to run a test image through the pipeline, by default False + + Returns + ------- + pd.DataFrame + The dataframe containing the pseudo cell segmentation results + if plot_everything is False + if plot_everything is True, returns the full dataframe (after merging with the tracking dataset) + + """ + + + # load the tracking dataframe and apply appropriate filters + df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline') + + dftrack = df.loc[df['track_id']==track_id].copy() + timepoint = int(dftrack['predicted_breakdown'].values[0]) + time_list = np.arange(timepoint-frames_before,timepoint+frames_after+1,dtype='uint16') + + colony = dftrack['colony'].values[0] + dfc = df.loc[df['colony']==colony].copy() + dfm = dfc.loc[(dfc['index_sequence'].isin(time_list))].copy() + + # artificially set all nuclei to have predicted_breakdown and predicted_formation to -1 + dft = dfm[dfm['index_sequence']==timepoint] + x1,y1,w,h = determine_crop_size_and_location_from_track(track_id,dft,crop_w=w,crop_h=w,RESOLUTION_LEVEL=1) + + nrows = 2 + ncols = len(time_list) + assert ncols>1 + fig,axr = plt.subplots(nrows,ncols,figsize=(ncols*1.5,nrows*1.5)) + # remove x and y ticks + [ax.axis('off') for ax in axr.flatten()] + + for ti,tval in enumerate(tqdm(time_list)): + print(ti) + # perform watershed based pseudo cell segmentation + _, img_dict = watershed_workflow.get_image_and_run(colony, tval, RESOLUTION_LEVEL, return_img_dict=True) + + # load the raw image and add to image dict + raw_img_reader = load_data.get_dataset_original_file_reader(colony) + raw_img = raw_img_reader.get_image_dask_data("ZYX", T=tval, C=0).max(axis=0).compute() + img_dict['raw_image'] = (raw_img,'mEGFP-tagged lamin B1') + + # now get the subset of the dataframe for the colony and timepoint + dfsub = dfm[dfm['colony']==colony] + dft = dfsub[dfsub['index_sequence']==tval] + + # now set the crop for the image + crop_exp = np.index_exp[y1:y1+h,x1:x1+w] + + for rowi, key in enumerate(['colorize','raw_image']): + ax = axr[rowi,ti] # top row axis + assert type(ax) is plt.Axes + if key in ['overlay','colorize']: + img = np.zeros_like(img_dict['mip_of_labeled_image'][0]) + cell_img = img_dict['pseudo_cells_img'][0].copy() + cell_img[img_dict['cell_shed_bin'][0]==0] = 0 # remove the edges + contour_list = get_contours_from_pair_of_2d_seg_image(img_dict['mip_of_labeled_image'][0],cell_img,) + label = f"{(tval//12)}:{str((tval%12)*5).zfill(2)} hr:min" + else: + img = img_dict[key][0] + label = "" + + imgcmap, vmin, vmax = determine_colormaps(img,key,crop_exp) + ax.imshow(img, + interpolation='nearest', + cmap = imgcmap, + vmin = vmin, + vmax = vmax, + origin='lower', + ) + + # adjust axes + ax.set_title(label) + ax.axis('off') + ax.set_xlim([x1,x1+w]) + ax.set_ylim([y1,y1+h]) + + if key in ['overlay','colorize']: + labels_in_img = np.unique(img_dict['pseudo_cells_img'][0][crop_exp]) + dftsub = dft[dft['label_img'].isin(labels_in_img)].copy() + + + colormap_dict = {} #type:ignore + colormap_dict.update({'nothing':('frame_of_breakdown',False,1,(0.4,0.4,0.4),f"")}) + # colormap_dict.update({'has_mitotic_neighbor_breakdown_dilated':('has_mitotic_neighbor_breakdown_forward_dilated',True,3,(0.8,0,0),f"has mitotic neighbor (forward)")}) + colormap_dict.update({'has_mitotic_neighbor_dilated':('has_mitotic_neighbor_dilated',True,3,(0.8,0,0),f"has mitotic neighbor (propagated)")}) + # colormap_dict.update({'has_mitotic_neighbor_breakdown':('has_mitotic_neighbor_breakdown',True,4,(1.0,0.0,1.0),f"has mitotic neighbor (breakdown)")}) + colormap_dict.update({'has_mitotic_neighbor':('has_mitotic_neighbor',True,4,(1.0,0.0,1.0),f"has mitotic neighbor")}) + + colormap_dict.update({'track':('track_id',track_id,2,(0.0,1.0,0.0),f"cell that will divide")}) #type:ignore + colormap_dict.update({'frame_of_breakdown':('frame_of_breakdown',True,8,(1.0,1.0,0.0),f"breakdown event")}) + colormap_dict.update({'frame_of_formation':('frame_of_formation',True,9,(0.0,1.0,1.0),f"formation event")}) + + new_colors = np.zeros((np.max([x[2] for x in colormap_dict.values()])+1,3)) + for col in colormap_dict.keys(): + new_colors[colormap_dict[col][2]] = colormap_dict[col][3] + # attach alpha values + new_colors = np.concatenate([new_colors,np.ones((new_colors.shape[0],1))],axis=1) + newcmap = ListedColormap(new_colors) + + dftsub['color_col'] = 0 + for col in colormap_dict.keys(): + dftsub.loc[dftsub[colormap_dict[col][0]]==colormap_dict[col][1], 'color_col'] = colormap_dict[col][2] + + # now draw the contours + colorfeat = 'color_col' + draw_contours_on_image(ax,contour_list,filled=True,colorize=False,dft=dftsub,linewidth=0.5,colorfeat=colorfeat,usedfcolors=True,cmap=newcmap) + # now save the figure + savedir = Path(__file__).parent / 'figures' / 'suppfigs4_mitotic_removal_figure_illustration' + savedir.mkdir(exist_ok=True,parents=True) + + for ext in ['.png','.pdf']: + savename = f'{colony}_{track_id}_{tval}_res{RESOLUTION_LEVEL}' + savepath = savedir / savename + print(f'Saved figure to {savepath}') + save_and_show_plot(str(savepath), + file_extension=ext, + figure=fig, + transparent=True, + keep_open=True, + **{'dpi':600} + ) + +#%% +if __name__ == '__main__': + dft1 = run_validation_and_plot() + diff --git a/nuc_morph_analysis/lib/preprocessing/filter_data.py b/nuc_morph_analysis/lib/preprocessing/filter_data.py index d329bc99..bd83c69d 100644 --- a/nuc_morph_analysis/lib/preprocessing/filter_data.py +++ b/nuc_morph_analysis/lib/preprocessing/filter_data.py @@ -1041,8 +1041,7 @@ def remove_uncaught_pseudo_cell_artifacts(df, apply_to_nucleus_too=False, verbos The signature of these cells is that the area of the pseudo cell is much larger than the area of the nucleus OR The perimeter of the pseudo cell is larger than 500 pixels OR - The perimeter of the pseudo cell is much larger than the perimeter of the nucleus OR - AND they tend to be close to the colony edge (colony depth <= 3) + The perimeter of the pseudo cell is much larger than the perimeter of the nucleus This workflow marks the features that depend on pseudo cell segmentation as NaN for these cells This workflow also adds a column called `uncaught_pseudo_cell_artifact` to the dataframe to mark these cells @@ -1061,9 +1060,8 @@ def remove_uncaught_pseudo_cell_artifacts(df, apply_to_nucleus_too=False, verbos log1 = df['2d_perimeter_nuc_cell_ratio'] < 0.4 log2 = df['2d_perimeter_pseudo_cell'] > 500 log3 = df['2d_area_nuc_cell_ratio'] < 0.2 - log4 = df['colony_depth'] <= 3 - compiled_log = (log1 | log2 | log3) & log4 + compiled_log = log1 | log2 | log3 # define the columns to apply the filter to extra_cols = ['inv_cyto_density','density'] diff --git a/nuc_morph_analysis/lib/preprocessing/neighbor_analysis/neighbor_analysis.py b/nuc_morph_analysis/lib/preprocessing/neighbor_analysis/neighbor_analysis.py index 87df9a2b..07aaedd5 100644 --- a/nuc_morph_analysis/lib/preprocessing/neighbor_analysis/neighbor_analysis.py +++ b/nuc_morph_analysis/lib/preprocessing/neighbor_analysis/neighbor_analysis.py @@ -128,4 +128,12 @@ def compute_density(df, global_df, num_workers=1): neigh_stats = pd.concat(neigh_stats, axis=0) neigh_stats = neigh_stats.groupby(["CellId"]).mean() + # now remove CellIds that have bad pseudo cell segmentation (i.e. bad_pseudo_cells_segmentation) + # 'uncaught_pseudo_cell_artifact','bad_pseudo_cells_segmentation' + print("Removing bad pseudo cells") + print('Before removing bad pseudo cells: ', neigh_stats.shape[0]) + cell_ids_to_remove = df[df['bad_pseudo_cells_segmentation'] == True]['CellId'].values + neigh_stats = neigh_stats[~neigh_stats.index.isin(cell_ids_to_remove)] + print('After removing bad pseudo cells: ', neigh_stats.shape[0]) + return neigh_stats diff --git a/nuc_morph_analysis/lib/visualization/example_tracks.py b/nuc_morph_analysis/lib/visualization/example_tracks.py index 797140ed..cebef2ac 100644 --- a/nuc_morph_analysis/lib/visualization/example_tracks.py +++ b/nuc_morph_analysis/lib/visualization/example_tracks.py @@ -23,5 +23,7 @@ "delta_v_BC_low": 86418, "transition_point_supplement": 82210, "sample_full_trajectories": [97942, 85296, 9808, 77656, 83322], + "pseudocell_density_example": 81463, + "pseudocell_mitoticfilter_example": 87135, "volume_dip_example": 75725, } diff --git a/test/lib/preprocessing/test_voronoi.py b/test/lib/preprocessing/test_voronoi.py index 23d19745..fe95855b 100644 --- a/test/lib/preprocessing/test_voronoi.py +++ b/test/lib/preprocessing/test_voronoi.py @@ -117,7 +117,6 @@ def test_voronoi_real_densities(): df_colony_metrics["old_density"] = df_colony_metrics["density"].apply(lambda x: 4 * np.sqrt(x)) assert np.allclose(df_colony_metrics["old_density"], df_colony_metrics.density_gt, rtol=0.2) - def test_voronoi_synthetic_distance_density(): """ This test uses a set of cells laid out in the following pattern.