Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
6dd62d3
remove unused whole colony LRM feats
chantelleleveille Nov 13, 2024
dc643ab
optionally add mother or sister features for full tracks
chantelleleveille Nov 13, 2024
72c23bf
only add sister features used in LRM
chantelleleveille Nov 13, 2024
e846f20
dont calc neighborhood avg for dxdt feats automatically
chantelleleveille Nov 13, 2024
89fe2b7
remove unused lrm ft
chantelleleveille Nov 15, 2024
0fd10a8
temp sorting through old vs new columns
chantelleleveille Nov 19, 2024
7668a39
Merge remote-tracking branch 'origin/dev' into feature_documentation
chantelleleveille Dec 5, 2024
0e5827b
update
chantelleleveille Dec 5, 2024
7d56b83
Merge remote-tracking branch 'origin/dev' into feature_documentation
chantelleleveille Dec 5, 2024
d3d5728
inv_cyto and others removed
cfrick13 Dec 5, 2024
e3705b8
determine neighbor_of_X columns to keep
cfrick13 Dec 5, 2024
85a9f0e
more feature pruning
chantelleleveille Dec 5, 2024
1ab84ea
first pass going through new features to keep and drop
cfrick13 Dec 5, 2024
f9bdd0b
clean up keep and drop lists
cfrick13 Dec 5, 2024
1b4b969
add code to remove columns to be dropped
cfrick13 Dec 5, 2024
1719987
move extra neighbor_of_X code into examples folder
cfrick13 Dec 5, 2024
06169b5
clarify var names
cfrick13 Dec 5, 2024
e8080d9
remove unused number_of_frame_X_neighbor columns and corresponding ex…
cfrick13 Dec 5, 2024
dc516b5
add code to remove those columns using global_dataset_filtering
cfrick13 Dec 5, 2024
6ef56f6
remove dropped columns from label_tables
jcass11 Dec 5, 2024
9a42385
remove density
jcass11 Dec 6, 2024
1e0dcc8
make fast load_local run all
cfrick13 Dec 6, 2024
df10a28
Merge branch 'dev' of github.com:AllenCell/nuc-morph-analysis into fe…
cfrick13 Dec 6, 2024
be138ba
Merge remote-tracking branch 'origin/dev' into feature_documentation
chantelleleveille Dec 6, 2024
d64ce32
update TFE to reflect new features
chantelleleveille Dec 6, 2024
b81fdc1
restore load_local=False
cfrick13 Dec 6, 2024
08741ba
Merge pull request #53 from AllenCell/feature_doc_test_density_frick
jcass11 Dec 6, 2024
8d7aa2d
remove old density feature from repo
jcass11 Dec 6, 2024
3d0b3f8
use glossary file from tfe update commit d64ce32
jcass11 Dec 6, 2024
e5b537b
boolean feature correction
chantelleleveille Dec 6, 2024
e3b498e
update glossary
chantelleleveille Dec 6, 2024
4140bfd
Merge remote-tracking branch 'origin/dev' into feature_documentation
chantelleleveille Dec 6, 2024
7936229
update saving csv for quilt
chantelleleveille Dec 7, 2024
ff3c2d1
remove power law fit
chantelleleveille Dec 7, 2024
d2cbd5a
remove power law fit
chantelleleveille Dec 7, 2024
a8fceaf
update feature documentation
chantelleleveille Dec 7, 2024
ab83497
update import paths
chantelleleveille Dec 7, 2024
bdbb8bc
update documentation
chantelleleveille Dec 7, 2024
a8aaa29
remove extraneous denisities output from calc colony metrics
jcass11 Dec 7, 2024
38a6c61
remove extra _ output no longer expected from _calculate_distance
jcass11 Dec 7, 2024
e13c133
Merge branch 'feature_documentation' of https://github.com/AllenCell/…
chantelleleveille Dec 9, 2024
280d050
Fix label table duplicate
chantelleleveille Dec 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 160 additions & 102 deletions docs/feature_documentation.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,15 @@
os.makedirs(str(fig_dir), exist_ok=True)

# now plot correlation
feature_list = ['2d_area_nucleus','2d_area_pseudo_cell','2d_area_cyto','inv_cyto_density','2d_area_nuc_cell_ratio']
feature_list = ['2d_area_nucleus','2d_area_pseudo_cell','2d_area_nuc_cell_ratio']
for feature in feature_list:
fig,ax = plt.subplots(figsize=(2.5,2.5),layout = 'constrained')

if 'ratio' in feature: # ratio features
xunits = 1.0
yunits = 1.0
unitstr = ''
elif 'inv_cyto' in feature: # density features
xunits = 1/((0.108)**2)
yunits = 1/((0.108*2.5)**2)
unitstr = '(1/um^2)'

else: # area features
xunits = (0.108)**2
yunits = (0.108*2.5)**2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ def run_validation_and_plot(TIMEPOINT=48,colony='medium',RESOLUTION_LEVEL=1,plot
plot_colorized_image_with_contours(img_dict,dft,'2d_area_pseudo_cell','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True)
plot_colorized_image_with_contours(img_dict,dft,'2d_intensity_min_edge','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=False)
plot_colorized_image_with_contours(img_dict,dft,'2d_intensity_min_edge','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True)
plot_colorized_image_with_contours(img_dict,dft,'density','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=False)
plot_colorized_image_with_contours(img_dict,dft,'density','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True)
plot_colorized_image_with_contours(img_dict,dft,'zeros','viridis',colony,TIMEPOINT,RESOLUTION_LEVEL,categorical=False,draw_contours=True)
return dft0
else:
Expand Down
28 changes: 0 additions & 28 deletions nuc_morph_analysis/analyses/height/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,34 +96,6 @@ def height_colony_time_alignment(
)


def calculate_mean_density(df, scale):
"""
Calculate the mean height for a given index_sequence (i.e. timepoint) and the standard deviation of the mean.

Parameters
----------
df : pandas.DataFrame
DataFrame containing the data.
pixel_size : float
Pixel size in microns.

Returns
-------
mean_height : list
List of mean heights for each index_sequence.
standard_dev_height : list
List of standard deviations of the mean heights for each index_sequence.
"""
mean = []
standard_dev = []
feature_col = "2d_area_nuc_cell_ratio"
for _, df_frame in df.groupby("index_sequence"):
density = df_frame[feature_col].values * scale
mean.append(np.nanmean(density))
standard_dev.append(np.nanstd(density))
return mean, standard_dev


def density_colony_time_alignment(
df,
pixel_size,
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
#%%
from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering
from nuc_morph_analysis.analyses.neighbor_of_X import example_dying_track_neighbors
from nuc_morph_analysis.analyses.neighbor_of_X import example_mitotic_track_neighbors
from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_neighbors_of_dying
from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_of_neighbors_of_mitotic
from nuc_morph_analysis.analyses.neighbor_of_X import example_timepoint_numbers_of_mitotic
from nuc_morph_analysis.analyses.neighbor_of_X import example_of_tracks_with_multiple_mitotic_neighbors
from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_dying_track_neighbors
from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_mitotic_track_neighbors
from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_timepoint_neighbors_of_dying
from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_timepoint_of_neighbors_of_mitotic
from nuc_morph_analysis.analyses.neighbor_of_X.examples import example_of_tracks_with_multiple_mitotic_neighbors
#%%
df = global_dataset_filtering.load_dataset_with_features(dataset='all_baseline')
#%%
example_dying_track_neighbors.run_example(df)
example_mitotic_track_neighbors.run_example(df)
example_timepoint_neighbors_of_dying.run_example(df)
example_timepoint_of_neighbors_of_mitotic.run_example(df)
example_timepoint_numbers_of_mitotic.run_example(df)
example_of_tracks_with_multiple_mitotic_neighbors.run_example(df)
16 changes: 5 additions & 11 deletions nuc_morph_analysis/lib/preprocessing/add_colony_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def add_colony_metrics(df: pd.DataFrame):
1 are depth 2, etc.
neighbors: string. List of neighboring Cell IDs
neigh_distance: float. Unit: voxels. Mean distance to neighboring cells
density: float. Unit: voxels. 1 / neigh_distance^2
"""
# This function is only intended to run on data from one colony at a time
if "dataset" in df.columns:
Expand All @@ -54,16 +53,13 @@ def add_colony_metrics(df: pd.DataFrame):


def _add_colony_metrics_one_tp(df_timepoint: pd.DataFrame):
depth_map, neighborhoods, neigh_dists, densities = _calc_colony_metrics(df_timepoint)
depth_map, neighborhoods, neigh_dists = _calc_colony_metrics(df_timepoint)
for _, (lbl, depth) in enumerate(depth_map.items()):
df_timepoint.loc[df_timepoint["label_img"] == lbl, "colony_depth"] = depth

for _, (lbl, dist) in enumerate(neigh_dists.items()):
df_timepoint.loc[df_timepoint["label_img"] == lbl, "neigh_distance"] = dist

for _, (lbl, density) in enumerate(densities.items()):
df_timepoint.loc[df_timepoint["label_img"] == lbl, "density"] = density

for _, (lbl, neighbors) in enumerate(neighborhoods.items()):
neighbor_ids = []
for neighbor in neighbors:
Expand All @@ -88,16 +84,15 @@ def _calc_colony_metrics(df_timepoint):
neighbors = _make_neighbor_map(voronoi, labels)

centroids_by_label = {label: centroids_list[index] for index, label in enumerate(labels)}
neigh_distance, density = _calculate_distance_density(labels, neighbors, centroids_by_label)
neigh_distance = _calculate_distance(labels, neighbors, centroids_by_label)

depth1_labels = _get_depth1_labels(labels, centroids_list, voronoi)
depth_map = calculate_depth(neighbors, depth1_labels)

return depth_map, neighbors, neigh_distance, density
return depth_map, neighbors, neigh_distance


def _calculate_distance_density(labels, neighbors, centroids):
density = {}
def _calculate_distance(labels, neighbors, centroids):
neigh_distance = {}
for lbl in labels:
try:
Expand All @@ -114,9 +109,8 @@ def _calculate_distance_density(labels, neighbors, centroids):
if neighbor != lbl:
dist = np.sqrt(np.sum((centroid - np.array(centroids[neighbor])) ** 2, axis=0))
dists.append(dist)
density[lbl] = 1 / np.mean(dists) ** 2
neigh_distance[lbl] = np.mean(dists)
return neigh_distance, density
return neigh_distance


def _make_neighbor_map(voronoi, labels):
Expand Down
36 changes: 21 additions & 15 deletions nuc_morph_analysis/lib/preprocessing/add_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def get_sister(df, pid, current_tid):
sister_id = [tid for tid in tids if tid != current_tid]
return sister_id

def add_lineage_features(df, feature_list):
def add_lineage_features(df, feature_list, relationship_list=['mother', 'sister']):
"""
If the full track has a full track sister or mother, add the given relative's feature as a single track feature column in the dataframe.

Expand All @@ -485,6 +485,8 @@ def add_lineage_features(df, feature_list):
The dataframe
feature_list: list
List of column names
relationship_list: list
List of relationships to add

Returns
-------
Expand All @@ -493,17 +495,19 @@ def add_lineage_features(df, feature_list):
"""

for feature in feature_list:
df[f"mothers_{feature}"] = np.nan
df[f"sisters_{feature}"] = np.nan
if 'mother' in relationship_list:
df[f"mothers_{feature}"] = np.nan
if 'sister' in relationship_list:
df[f"sisters_{feature}"] = np.nan

df_lineage = df[df['colony'].isin(['small', 'medium'])]

for tid, dft in df_lineage.groupby("track_id"):
parent_id = dft.parent_id.values[0]
if parent_id != -1 and parent_id in df_lineage.track_id.unique():
if 'mother' in relationship_list and parent_id != -1 and parent_id in df_lineage.track_id.unique():
for feature in feature_list:
df.loc[df.track_id == tid, f"mothers_{feature}"] = df_lineage.loc[df_lineage.track_id == parent_id, feature].values[0]
if parent_id != -1:
if 'sister' in relationship_list and parent_id != -1:
sister_id = get_sister(df_lineage, parent_id, tid)
if len(sister_id) > 0:
for feature in feature_list:
Expand Down Expand Up @@ -570,17 +574,18 @@ def sum_mitotic_events_along_full_track(df0, feature_list=[]):
"""

mitotic_event_features = [
'number_of_frame_of_breakdown_neighbors',
'number_of_frame_of_formation_neighbors',
'has_mitotic_neighbor_breakdown',
'has_mitotic_neighbor_formation',
'has_mitotic_neighbor_breakdown_forward_dilated',
'has_mitotic_neighbor_formation_backward_dilated',
'has_mitotic_neighbor',
'has_mitotic_neighbor_dilated',
'has_dying_neighbor',
'has_dying_neighbor_forward_dilated',
'number_of_frame_of_death_neighbors'

# 'number_of_frame_of_breakdown_neighbors',
# 'number_of_frame_of_formation_neighbors',
# 'has_mitotic_neighbor_breakdown',
# 'has_mitotic_neighbor_formation',
# 'has_mitotic_neighbor_breakdown_forward_dilated',
# 'has_mitotic_neighbor_formation_backward_dilated',
# 'has_mitotic_neighbor_dilated',
# 'has_dying_neighbor_forward_dilated',
# 'number_of_frame_of_death_neighbors'
]

if len(feature_list) == 0:
Expand Down Expand Up @@ -635,7 +640,6 @@ def add_features_at_transition(df,
'neighbor_avg_lrm_height_90um',
'neighbor_avg_lrm_xy_aspect_90um',
'neighbor_avg_lrm_mesh_sa_90um',
'neighbor_avg_dxdt_48_volume_90um',
'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um']
):
"""
Expand Down Expand Up @@ -709,6 +713,8 @@ def add_volume_change_over_25_minute_window(df, bin_interval=5):
# run the compute_change_over_time workflow for a given bin_interval
dfm = run_script(dfm,['volume'], [bin_interval], time_location='end')
dfm['volume_change_over_25_minutes'] = dfm['dxdt_5_volume_end']*5
# drop the dxdt_5_volume_end column
dfm = dfm.drop(columns=['dxdt_5_volume_end'])

# now check that all columns in df have the same dtype as columns in dfm
for col in df.columns:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from nuc_morph_analysis.lib.preprocessing import filter_data
from nuc_morph_analysis.lib.preprocessing.filter_data import all_timepoints_minimal_filtering

LOCAL_RADIUS_LIST = [90, -1]
LOCAL_RADIUS_STR_LIST = ["90um", "whole_colony"]
LOCAL_RADIUS_LIST = [90]
LOCAL_RADIUS_STR_LIST = ["90um"]
NEIGHBOR_FEATURE_LIST = ["volume"]
NEIGHBOR_PREFIX = "neighbor_avg_lrm_"

Expand Down Expand Up @@ -143,7 +143,7 @@ def run_script(
dfi = df[df["colony"] == colony]
pass_cols = ["index_sequence", "colony", "track_id", "centroid_x", "centroid_y"]

columns = feature_list + [x for x in dfi.columns if "dxdt" in x]
columns = feature_list

# first find the unique index_sequence values
index_sequences = dfi["index_sequence"].unique()
Expand Down
Loading
Loading