diff --git a/nuc_morph_analysis/analyses/cell_health/cell_health_plots.py b/nuc_morph_analysis/analyses/cell_health/cell_health_plots.py
index 6740a399..8743a375 100644
--- a/nuc_morph_analysis/analyses/cell_health/cell_health_plots.py
+++ b/nuc_morph_analysis/analyses/cell_health/cell_health_plots.py
@@ -87,6 +87,8 @@ def plot_event_histogram(df, event_type, figdir):
         ax[2].set_xlabel('Time (hr)') 
         plt.tight_layout()
         
+        print(f"{colony}: max {percent_event.max():.2f}%")
+        
         save_and_show_plot(f'{figdir}/{event_label}_histogram_{colony}')
     
 
diff --git a/nuc_morph_analysis/analyses/linear_regression/analysis_plots.py b/nuc_morph_analysis/analyses/linear_regression/analysis_plots.py
new file mode 100644
index 00000000..708fbfc3
--- /dev/null
+++ b/nuc_morph_analysis/analyses/linear_regression/analysis_plots.py
@@ -0,0 +1,265 @@
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
+from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot
+from nuc_morph_analysis.analyses.linear_regression.linear_regression import fit_linear_regression
+from nuc_morph_analysis.analyses.linear_regression.select_features import (get_feature_list)
+
+def plot_feature_cluster_correlations(df_track_level_features, feature_list, figdir):
+    """
+    Plot clustermap of feature correlations.   
+    
+    Parameters
+    ----------
+    df_track_level_features : pd.DataFrame
+        DataFrame containing track level features
+    feature_list : list
+        List of features to include in the clustermap
+        Output from get_feature_list
+    figdir : str
+        Directory to save the figure
+
+    Returns
+    -------
+    Figure
+    """
+    data = df_track_level_features[feature_list]
+    
+    cluster_grid = sns.clustermap(data.corr(), vmin=-1, vmax=1, cmap='BrBG', #'vlag' red to blue
+                                cbar_pos=(0.4, 0.9, 0.3, 0.02), 
+                                cbar_kws={"orientation": "horizontal"},
+                                annot=True, fmt=".1f", annot_kws={"size": 12},
+                                figsize=(18, 18))
+
+        
+    # Hide the dendrograms
+    cluster_grid.ax_row_dendrogram.set_visible(False)
+    cluster_grid.ax_col_dendrogram.set_visible(False)
+    
+    # Get the reordered labels using the dendrogram information
+    reordered_column_index = cluster_grid.dendrogram_col.reordered_ind
+    reordered_labels = [get_plot_labels_for_metric(data.columns[i])[1] for i in reordered_column_index]
+    
+    # add a number to the end of each label
+    reordered_labels = [f'{label} ({i+1})' for i, label in enumerate(reordered_labels)]
+    
+    # for the bottom label just use numbers 
+    numbered_labels = [f'{i+1}' for i in range(len(reordered_labels))]
+
+    # Ensure the number of labels matches the number of ticks
+    cluster_grid.ax_heatmap.set_xticks([x + 0.5 for x in range(len(numbered_labels))])
+    cluster_grid.ax_heatmap.set_xticklabels(numbered_labels, rotation=0)
+    cluster_grid.ax_heatmap.set_yticks([y + 0.5 for y in range(len(reordered_labels))])
+    cluster_grid.ax_heatmap.set_yticklabels(reordered_labels, rotation=0)
+    
+    # Adjust the padding between the labels and the heatmap
+    cluster_grid.ax_heatmap.tick_params(axis='x', labelsize=12, width=0.7)
+    cluster_grid.ax_heatmap.tick_params(axis='y', labelsize=12, width=0.7,  labelright=False, labelleft=True, left=True, right=False)
+
+    save_and_show_plot(f'{figdir}/feature_correlation_clustermap', figure=cluster_grid.fig, bbox_inches='tight', dpi=300)
+
+
+def run_regression(df_track_level_features, target, features, name, alpha, figdir="figures/"):
+    """
+    Run linear regression on the given dataset and return the results.
+
+    Parameters:
+    ----------
+    df_track_level_features (pd.DataFrame): DataFrame containing the track level features.
+    target (str): The target variable for regression.
+    features (list): List of features to be used for regression.
+    name (str): Name of the feature group.
+    alpha (list): List of alpha values for regularization.
+    figdir (str): Directory path to save the figures.
+
+    Returns:
+    --------
+    dict: A dictionary containing the target, feature group name, mean R-squared value, 
+          standard deviation of R-squared values, alpha value, and the features used.
+    """
+    _, all_test_sc, _ = fit_linear_regression(
+        df_track_level_features, 
+        cols=get_feature_list(features, target), 
+        target=target, 
+        alpha=alpha,
+        tol=0.04, 
+        save_path=figdir,
+        save=False,
+        multiple_predictions=False
+    )
+    print(f"Target {target}, Alpha: {alpha}. Feature group: {name}")
+    r_squared = all_test_sc["Test r$^2$"].mean()
+    std = all_test_sc["Test r$^2$"].std()
+    return {'target': target, 'feature_group': name, 'r_squared': r_squared, 'stdev': std, 'alpha': 0, 'feats_used': get_feature_list(features, target)}
+    
+def run_regression_workflow(targets, feature_configs, df_track_level_features, alpha, figdir="figures/"):
+    """
+    Run the regression workflow for multiple targets and feature configurations.
+
+    Parameters:
+    ----------
+    targets (list): List of target variables for regression.
+    feature_configs (dict): Dictionary where keys are feature group names and values are lists of features.
+    df_track_level_features (pd.DataFrame): DataFrame containing the track level features.
+    figdir (str): Directory path to save the figures and results.
+    alpha (float): Alpha value for regularization.
+
+    Returns:
+    --------
+    pd.DataFrame: DataFrame containing the results of the regression workflow, including target, 
+                  R-squared value, standard deviation, feature group, alpha value, and features used.
+    """
+    df = pd.DataFrame(columns=['target', 'r_squared', 'stdev', 'feature_group', 'alpha', 'feats_used'])
+
+    for target in targets:
+        for name, features in feature_configs.items():
+            result = run_regression(df_track_level_features, target, features, name, [alpha], figdir)
+            df = df.append(result, ignore_index=True)
+            df.to_csv(f"{figdir}r_squared_results.csv")
+
+    df['num_feats_used'] = df['feats_used'].apply(lambda x: len(x))
+    
+    return df
+
+    
+def plot_heatmap(df, figdir, cmap='coolwarm'):
+    """
+    Plot heatmap of r_squared values for different feature groups.
+    
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing r_squared values for different feature groups
+    figdir : str
+        Directory to save the figure
+    cmap: str 
+        linear colormap
+    
+    Returns
+    -------
+    Figure
+    """
+    # Split the 'feature_group' column into two
+    df[['start_lifetime', 'intrinsic_extrinsic']] = df['feature_group'].str.split('_', expand=True)
+    def replace_values(val):
+        if val in ['all', 'features']:
+            return 'all_features'
+        else:
+            return val
+    df[['start_lifetime', 'intrinsic_extrinsic']] = df[['start_lifetime', 'intrinsic_extrinsic']].applymap(replace_values)
+    for index, row in df.iterrows():
+        if row['start_lifetime'] == 'intrinsic':
+            df.at[index, 'intrinsic_extrinsic'] = 'intrinsic'
+            df.at[index, 'start_lifetime'] = 'both'
+        elif row['start_lifetime'] == 'extrinsic':
+            df.at[index, 'intrinsic_extrinsic'] = 'extrinsic'
+            df.at[index, 'start_lifetime'] = 'both'
+
+    for target, df_target in df.groupby('target'):
+        pivot_df = df_target.pivot(index='start_lifetime', columns='intrinsic_extrinsic', values='r_squared')
+        pivot_df_std = df_target.pivot(index='start_lifetime', columns='intrinsic_extrinsic', values='stdev')
+
+        fig, ax = plt.subplots(figsize=(10, 8))
+
+        sns.heatmap(pivot_df, annot=False, cmap=cmap, ax=ax, vmin=0, vmax=0.5)
+        
+        first_element = True
+        for text_x in range(pivot_df.shape[0]):
+            for text_y in range(pivot_df.shape[1]):
+                value = pivot_df.iloc[text_x, text_y]
+                std_dev = pivot_df_std.iloc[text_x, text_y]
+                if not np.isnan(value):
+                    color = 'white' if first_element else 'black'
+                    ax.text(text_y+0.5, text_x+0.5, f'{value:.2f} ± {std_dev:.2f}', 
+                            horizontalalignment='center', 
+                            verticalalignment='center',
+                            color=color, fontsize=16)
+                    first_element = False
+        
+        ax.set_xticklabels(['','Extrinsic','Intrinsic'])
+        ax.xaxis.tick_top()
+        ax.set_yticklabels(['', 'Both', 'Lifetime', 'Start of growth'], rotation=0)
+
+        ax.set_xlabel('')
+        ax.set_ylabel('')
+        ax.tick_params(axis='both', which='both', length=0)
+        title = ax.set_title(f'Target: {get_plot_labels_for_metric(target)[1]}', loc='left')
+        title.set_position([-0.1,1])
+        save_and_show_plot(f'{figdir}{target}_prediction_r_squared_matrix_alpha_{df.alpha[0]}',  bbox_inches='tight')
+
+
+def plot_feature_contribution(coef_alpha, test_sc, perms, target, alpha, fig_height, figdir):
+    """
+    For a given target, plot feature importance for each feature in the linear model at a specified alpha.
+    Features that touch 0 are considered not important and are excluded from the plot. 
+    
+    Paramaters
+    -------
+    coef_alpha: pd.DataFrame
+        DataFrame containing the coefficient importance for each feature
+    test_sc: pd.DataFrame
+        DataFrame containing the test r2 scores
+    perms: pd.DataFrame
+        DataFrame containing the permutation test results
+    target: str
+        Prediction feature
+    alpha: int
+        Regularization parameter
+    fig_height: int
+        Height of the figure based on number of important features
+    save_path: str
+        Path to save the plot
+        
+    Returns
+    -------
+    Figure
+    """
+    p_value = round(perms["p_value"].item(), 3)
+    test_r2_mean = round(test_sc["Test r$^2$"].mean(), 2)
+    test_r2_std = round(test_sc["Test r$^2$"].std() / 2, 2)
+    
+    for col, df_col in coef_alpha.groupby("Column"):
+        lower_bound = df_col["Coefficient Importance"].mean() - df_col["Coefficient Importance"].std()
+        upper_bound = df_col["Coefficient Importance"].mean() + df_col["Coefficient Importance"].std()
+        if lower_bound < 0 and upper_bound > 0 or df_col["Coefficient Importance"].mean() == 0:
+            coef_alpha = coef_alpha[coef_alpha["Column"] != col] # if coeff importance is 0, dont plot feature
+    
+    coef_alpha['Magnitude coefficient importance'] = abs(coef_alpha['Coefficient Importance'])
+    coef_alpha['Sign'] = coef_alpha['Coefficient Importance'].apply(lambda x: 'Positive' if x > 0 else 'Negative')
+    
+    coef_alpha['Mean Magnitude'] = coef_alpha.groupby('Column')['Magnitude coefficient importance'].transform('mean')
+    coef_alpha = coef_alpha.sort_values('Mean Magnitude', ascending=False).drop(columns=['Mean Magnitude'])
+    
+    plt.figure(figsize=(4,fig_height*.5))
+    ax = sns.barplot(
+        data=coef_alpha,
+        y="Column",
+        x="Magnitude coefficient importance",
+        hue="Sign",
+        palette={'Positive': '#156082', 'Negative': 'grey'},
+        errorbar="sd",
+        width=0.7, 
+        native_scale=True)
+    
+    for patch in ax.patches:
+            patch.set_edgecolor('black') 
+            patch.set_linewidth(1.5)
+            current_height = patch.get_height()
+            desired_height = 0.7
+            patch.set_height(desired_height)
+            patch.set_y(patch.get_y() + (current_height - desired_height) * 0.5)
+
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+
+    plt.ylabel("")
+    plt.legend(frameon=False, bbox_to_anchor=(1.05, 1), loc='upper left')
+    if target == 'delta_volume_BC':
+        ax.get_legend().remove()
+    plt.title(f"Target: {get_plot_labels_for_metric(target)[1]}, alpha={alpha}, test r\u00B2={test_r2_mean}±{test_r2_std}, P={p_value}")
+    label_list = [get_plot_labels_for_metric(col)[1] for col in coef_alpha["Column"].unique()]
+    
+    plt.yticks(ticks=range(len(label_list)), labels=label_list)
+    save_and_show_plot(f'{figdir}/coefficients_{target}_alpha_{alpha}', dpi=300, bbox_inches='tight')
\ No newline at end of file
diff --git a/nuc_morph_analysis/analyses/linear_regression/linear_regression_workflow.py b/nuc_morph_analysis/analyses/linear_regression/linear_regression.py
similarity index 50%
rename from nuc_morph_analysis/analyses/linear_regression/linear_regression_workflow.py
rename to nuc_morph_analysis/analyses/linear_regression/linear_regression.py
index bbadf2f6..56a8d235 100644
--- a/nuc_morph_analysis/analyses/linear_regression/linear_regression_workflow.py
+++ b/nuc_morph_analysis/analyses/linear_regression/linear_regression.py
@@ -1,9 +1,7 @@
 import argparse
-import ast
 import os
 import warnings
 from pathlib import Path
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import seaborn as sns
@@ -12,23 +10,68 @@
     RepeatedKFold,
     cross_validate,
 )
+from tqdm import tqdm
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from tqdm import tqdm
 from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, filter_data
 from sklearn.model_selection import permutation_test_score
-from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
+from nuc_morph_analysis.lib.visualization.plotting_tools import (
+    get_plot_labels_for_metric,
+)
+import imageio
+from nuc_morph_analysis.analyses.linear_regression.select_features import (
+    get_feature_list,
+)
+from nuc_morph_analysis.analyses.linear_regression.utils import (
+    list_of_strings,
+    list_of_floats,
+)
 
 pd.options.mode.chained_assignment = None  # default='warn'
 
 warnings.simplefilter(action="ignore", category=FutureWarning)
 
-def main(cols, target, alpha_range, tolerance, save_path, cached_dataframe=None):
 
+def main(
+    cols,
+    target,
+    alpha_range,
+    tolerance,
+    save_path,
+    cached_dataframe=None,
+    save_movie=False,
+):
+    """
+    Main function to perform linear regression analysis.
+
+    Parameters
+    ----------
+    cols : list of str
+        List of column names to be used as features for the regression.
+    target : str
+        The target column name for the regression.
+    alpha_range : tuple of float
+        The range of alpha values (regularization strength) to be tested.
+    tolerance : float
+        The tolerance for the optimization.
+    save_path : str
+        The path where the results and plots will be saved.
+    cached_dataframe : pd.DataFrame, optional
+        A cached DataFrame to use for the analysis. If None, a new DataFrame will be created.
+    save_movie : bool, optional
+        If True, a movie of the regression process will be saved. Default is False.
+
+    Returns
+    -------
+    None
+    """
     save_path = Path(save_path)
     save_path = save_path / Path("linear_regression")
     save_path.mkdir(parents=True, exist_ok=True)
 
+    if len(cols) < 1:
+        cols = get_feature_list(['start_intrinsic', 'lifetime_intrinsic', 'start_extrinsic', 'lifetime_extrinsic'], None)
+
     if not cached_dataframe:
         df_all = global_dataset_filtering.load_dataset_with_features()
         df_full = filter_data.all_timepoints_full_tracks(df_all)
@@ -36,18 +79,30 @@ def main(cols, target, alpha_range, tolerance, save_path, cached_dataframe=None)
     else:
         df_track_level_features = pd.read_csv(cached_dataframe)
 
-    fit_linear_regression(df_track_level_features, cols, target, alpha_range, tolerance, save_path)
+    fit_linear_regression(
+        df_track_level_features,
+        cols,
+        target,
+        alpha_range,
+        tolerance,
+        save_path,
+        save_movie,
+    )
+
 
 def fit_linear_regression(
-    data, cols, target, alpha, tol, save_path
+    data, cols, target, alpha, tol, save_path, save, permute_cols=[], multiple_predictions=True
 ):
     """
     data - track level features
-    cols - input features
+    cols - input features, must not contain rows with nans
     target - target to predict
-    alpha - hyperparameter for lasso
+    alpha - hyperparameter for lassogit 
     tol - tolerance to check drop in r^2 for finding best alpha (ex. 0.02)
     save_path - location to save files
+    save - whether to save movies and pngs
+    permute_col - list of features to permute and replace with noise
+    multiple_predictions - Boolean, whether to stop when r^2 is reduced by tolerance
     """
     sns.set_context("talk")
     random_state = 2652124
@@ -55,15 +110,27 @@ def fit_linear_regression(
     # init empty dicts and lists
     all_test_sc = []
     all_coef_alpha = []
-    all_perms = {'score': [], 'perm_score_mean': [], 'perm_score_std': [], 'p_value': [], 'alpha': []}
+    all_perms = {
+        "score": [],
+        "perm_score_mean": [],
+        "perm_score_std": [],
+        "p_value": [],
+        "alpha": [],
+    }
+
+    if multiple_predictions:
+        # remove 0 alpha due to convergence errors
+        alpha = [i for i in alpha if i != 0]
 
-    # find best alpha for Lasso model
-    for alpha_ind, this_alpha in enumerate(alpha):
-        print("fitting alpha", this_alpha)
 
-        # drop any nan rows
-        dropna_cols = cols + [target]
-        data = data.dropna(subset=dropna_cols)
+    # find best alpha for Lasso model
+    for alpha_ind, this_alpha in tqdm(enumerate(alpha), total=len(alpha)):
+        # permute columns if necessary
+        if len(permute_cols) > 0:
+            for col in permute_cols:
+                mu, sigma = 0, 1
+                noise = np.random.normal(mu, sigma, len(data))
+                data[col] = noise
 
         # make numpy array for inputs and target
         all_input = data[cols].reset_index(drop=True).values
@@ -80,25 +147,30 @@ def fit_linear_regression(
 
         # run permutation test
         score, permutation_scores, pvalue = permutation_test_score(
-            model, all_input, all_target, random_state=random_state, cv=5, n_permutations=500,
+            model,
+            all_input,
+            all_target,
+            random_state=random_state,
+            cv=5,
+            n_permutations=500,
         )
 
         # break if permutation score is less than linear regression value (max possible)
         # with a tolerance
         # or if p_value > 0.05
-        rounded_permutation_score = round(score, 2)
         if alpha_ind == 0:
-            max_val = rounded_permutation_score
-        if abs(rounded_permutation_score - max_val) > tol or (pvalue > 0.05):
-            break
+            max_val = score
+        if multiple_predictions:
+            if abs(score - max_val) > tol or (pvalue > 0.05):
+                break
 
         # if relatively equal to linear regression value, then continue
         # save permutation score and p_value to dictionary
-        all_perms['score'].append(score)
-        all_perms['perm_score_mean'].append(permutation_scores.mean())
-        all_perms['perm_score_std'].append(permutation_scores.std())
-        all_perms['p_value'].append(pvalue)
-        all_perms['alpha'].append(this_alpha)
+        all_perms["score"].append(score)
+        all_perms["perm_score_mean"].append(permutation_scores.mean())
+        all_perms["perm_score_std"].append(permutation_scores.std())
+        all_perms["p_value"].append(pvalue)
+        all_perms["alpha"].append(this_alpha)
 
         # run cross validate to get model coefficients
         cv_model = cross_validate(
@@ -120,10 +192,8 @@ def fit_linear_regression(
         )
 
         # Save test r^2 and test MSE to dataframe
-        range_test_scores = [round(i, 2) for i in cv_model["test_r2"]]
-        range_errors = [
-            round(i, 2) for i in cv_model["test_neg_mean_squared_error"]
-        ]
+        range_test_scores = [i for i in cv_model["test_r2"]]
+        range_errors = [i for i in cv_model["test_neg_mean_squared_error"]]
         test_sc = pd.DataFrame()
         test_sc[r"Test r$^2$"] = range_test_scores
         test_sc["Test MSE"] = range_errors
@@ -141,7 +211,8 @@ def fit_linear_regression(
     # Get test scores for all alpha
     all_test_sc = pd.concat(all_test_sc, axis=0).reset_index(drop=True)
     all_test_sc["Test MSE"] = -all_test_sc["Test MSE"]
-    all_test_sc.to_csv(save_path / "mse.csv")
+    save_path = save_path / Path(f"{target}")
+    save_path.mkdir(parents=True, exist_ok=True)
 
     # Get coeffs for all alpha
     all_coef_alpha = pd.concat(all_coef_alpha, axis=0).reset_index(drop=True)
@@ -150,61 +221,73 @@ def fit_linear_regression(
         var_name="Column",
         value_name="Coefficient Importance",
     ).reset_index(drop=True)
-    all_coef_alpha.to_csv(save_path / "coefficients.csv")
 
     # Get permutation scores and p values for all alpha
     all_perms = pd.DataFrame(all_perms).reset_index(drop=True)
-    all_perms.to_csv(save_path / "perm_scores.csv")
 
-    # Save coefficient plot for max alpha value
-    save_plots(all_coef_alpha, all_test_sc, all_perms, target, save_path)
+    # Save coefficient plot movie
+    if save:
+        all_test_sc.to_csv(save_path / "mse.csv")
+        all_coef_alpha.to_csv(save_path / "coefficients.csv")
+        all_perms.to_csv(save_path / "perm_scores.csv")
+        save_plots(all_coef_alpha, all_test_sc, all_perms, target, save_path)
 
     return all_coef_alpha, all_test_sc, all_perms
 
-def save_plots(all_coef_alpha, all_test_sc, all_perms, target, save_path):
 
-    # subset to max alpha
-    max_alpha = all_coef_alpha['alpha'].max()
-    all_coef_alpha = all_coef_alpha.loc[all_coef_alpha['alpha'] == max_alpha].reset_index(drop=True)
-    all_test_sc = all_test_sc.loc[all_test_sc['alpha'] == max_alpha].reset_index(drop=True)
-    all_perms = all_perms.loc[all_perms['alpha'] == max_alpha].reset_index(drop=True)
-
-    p_value = round(all_perms['p_value'].item(), 3)
-    test_r2_mean = round(all_test_sc['Test r$^2$'].mean(), 2)
-    test_r2_std = round(all_test_sc['Test r$^2$'].std()/2, 2)
-
-
-    g = sns.catplot(
-        data=all_coef_alpha,
-        x='Column',
-        y="Coefficient Importance",
-        kind="bar",
-        errorbar="sd",
-        aspect=1.5,
-        height=4,
-    )
-    g.fig.subplots_adjust(top=0.8) # adjust the Figure in rp
-    g.fig.suptitle(f'p-value {p_value}, test r^2 {test_r2_mean}+-{test_r2_std}')
-    label_list = [get_plot_labels_for_metric(col)[1] for col in all_coef_alpha['Column'].unique()]
-    g.set_xticklabels(label_list, rotation=90)
-    print(f'Saving coefficients_{target}_alpha_{max_alpha}.png')
-    g.savefig(save_path / f'coefficients_{target}_alpha_{max_alpha}.png')
+def save_plots(all_coef_alpha, all_test_sc, all_perms, target, save_path):
 
-def list_of_strings(arg):
-    return arg.split(",")
+    xlim = None
+    files = []
+    for alpha in all_coef_alpha["alpha"].unique():
+        this_coef_alpha = all_coef_alpha.loc[
+            all_coef_alpha["alpha"] == alpha
+        ].reset_index(drop=True)
+        this_test_sc = all_test_sc.loc[all_test_sc["alpha"] == alpha].reset_index(
+            drop=True
+        )
+        this_perms = all_perms.loc[all_perms["alpha"] == alpha].reset_index(drop=True)
+        
+        p_value = round(this_perms["p_value"].item(), 3)
+        test_r2_mean = round(this_test_sc["Test r$^2$"].mean(), 2)
+        test_r2_std = round(this_test_sc["Test r$^2$"].std() / 2, 2)
+
+        g = sns.catplot(
+            data=this_coef_alpha,
+            y="Column",
+            x="Coefficient Importance",
+            kind="bar",
+            errorbar="sd",
+            aspect=2,
+            height=10,
+        )
 
+        g.set(ylabel="")
 
-def list_of_floats(arg):
-    return list(map(float, arg.split(",")))
+        g.fig.subplots_adjust(top=0.9)  # adjust the Figure in rp
+        g.fig.suptitle(
+            f"Prediction of {get_plot_labels_for_metric(target)[1]}\nalpha={alpha:2f}, test r\u00B2={test_r2_mean}±{test_r2_std}, P={p_value}"
+        )
+        label_list = [
+            get_plot_labels_for_metric(col)[1]
+            for col in all_coef_alpha["Column"].unique()
+        ]
+        g.set_yticklabels(label_list)
+        print(f"Saving coefficients_{target}_alpha_{alpha:2f}.png")
+        this_path = str(save_path / Path(f"coefficients_{target}_alpha_{alpha:2f}.png"))
+        files.append(this_path)
 
+        if not xlim:
+            xlim = g.fig.axes[0].get_xlim()
+        g.set(xlim=xlim)
+        g.savefig(this_path, dpi=300)
 
-def str2bool(v):
-    if isinstance(v, bool):
-        return v
-    if v.lower() in ("yes", "true", "t", "y", "1"):
-        return True
-    elif v.lower() in ("no", "false", "False", "f", "n", "0"):
-        return False
+    # save movie of pngs
+    writer = imageio.get_writer(save_path / f"{target}_coefficients_over_time.mp4", fps=2)
+    for im in files:
+        writer.append_data(imageio.imread(im))
+        os.remove(im)
+    writer.close()
 
 
 if __name__ == "__main__":
@@ -222,7 +305,7 @@ def str2bool(v):
     parser.add_argument(
         "--cols",
         type=list_of_strings,
-        default=['volume_at_B', 'time_at_B', 'colony_time_at_B', 'SA_at_B'],
+        default=[],
         help="Supply a list of column names to use as independent variables in the linear regression analysis.",
     )
     parser.add_argument(
@@ -234,7 +317,7 @@ def str2bool(v):
     parser.add_argument(
         "--alpha_range",
         type=list_of_floats,
-        default=[1.3],
+        default=np.arange(0, 15, 0.1, dtype=float),
         help="Supply a list of alpha values to use in lasso regression",
     )
     parser.add_argument(
@@ -249,6 +332,12 @@ def str2bool(v):
         default=0.02,
         help="Tolerace for change in regression score to determine best alpha",
     )
+    parser.add_argument(
+        "--save",
+        type=bool,
+        default=False,
+        help="Save plots",
+    )
     args = parser.parse_args()
     main(
         cols=args.cols,
@@ -256,5 +345,6 @@ def str2bool(v):
         alpha_range=args.alpha_range,
         tolerance=args.tolerance,
         save_path=args.save_path,
-        cached_dataframe=args.cached_dataframe
+        cached_dataframe=args.cached_dataframe,
+        save_movie=args.save,
     )
diff --git a/nuc_morph_analysis/analyses/linear_regression/scripts/added_volume.sh b/nuc_morph_analysis/analyses/linear_regression/scripts/added_volume.sh
deleted file mode 100755
index 2b6da4d1..00000000
--- a/nuc_morph_analysis/analyses/linear_regression/scripts/added_volume.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-python linear_regression_workflow.py \
---cols 'volume_at_A','volume_at_B','time_at_A','time_at_B','colony_time_at_A','colony_time_at_B','SA_at_B' \
---alpha_range 0,0.1,0.5,1,1.3,1.5,2,2.5,5,10,11,12,13 \
---target 'delta_volume_BC' \
---save_path "../../figures/" \
\ No newline at end of file
diff --git a/nuc_morph_analysis/analyses/linear_regression/scripts/duration_BC.sh b/nuc_morph_analysis/analyses/linear_regression/scripts/duration_BC.sh
deleted file mode 100755
index ac7c53d7..00000000
--- a/nuc_morph_analysis/analyses/linear_regression/scripts/duration_BC.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-python linear_regression_workflow.py \
---cols 'volume_at_A','volume_at_B','time_at_A','time_at_B','colony_time_at_A','colony_time_at_B','SA_at_B' \
---alpha_range 0,0.1,0.5,1,1.3,1.5,2,2.5,5,10 \
---target 'duration_BC' \
---save_path "../../figures/" \
\ No newline at end of file
diff --git a/nuc_morph_analysis/analyses/linear_regression/scripts/ending_volume.sh b/nuc_morph_analysis/analyses/linear_regression/scripts/ending_volume.sh
deleted file mode 100755
index bc098992..00000000
--- a/nuc_morph_analysis/analyses/linear_regression/scripts/ending_volume.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-python linear_regression_workflow.py \
---cols 'volume_at_A','volume_at_B','time_at_A','time_at_B','colony_time_at_A','colony_time_at_B','SA_at_B' \
---alpha_range 0,0.1,0.5,1,1.3,1.5,2,2.5,5,10,11,12,13 \
---target 'volume_at_C' \
---save_path "../../figures/" \
\ No newline at end of file
diff --git a/nuc_morph_analysis/analyses/linear_regression/select_features.py b/nuc_morph_analysis/analyses/linear_regression/select_features.py
new file mode 100644
index 00000000..0d714f3e
--- /dev/null
+++ b/nuc_morph_analysis/analyses/linear_regression/select_features.py
@@ -0,0 +1,83 @@
+FEATURE_GROUPS = {
+    'start_intrinsic': [ #intrinic at start of growth
+        'volume_at_B', 
+        'height_at_B',
+        'SA_vol_ratio_at_B',
+        'SA_at_B',
+        'xy_aspect_at_B',
+        'sisters_volume_at_B',
+    ],
+    
+    'lifetime_intrinsic': [ # intrinsic lifetime
+        'duration_BC', 
+        'volume_at_C',
+        'delta_volume_BC',
+        'late_growth_rate_by_endpoints',
+        'tscale_linearityfit_volume',
+        'sisters_duration_BC',
+        'sisters_delta_volume_BC',
+    ],
+    
+    'start_extrinsic': [ # extrinsic at start of growth
+        'time_at_B', 
+        'colony_time_at_B',
+        'neighbor_avg_lrm_volume_90um_at_B', 
+        'neighbor_avg_lrm_height_90um_at_B',
+        'neighbor_avg_lrm_xy_aspect_90um_at_B',
+        'neighbor_avg_lrm_mesh_sa_90um_at_B',
+        'early_neighbor_avg_dxdt_48_volume_90um',   
+    ],
+    
+    'lifetime_extrinsic': [ # extrinsic lifetime
+        'normalized_sum_has_mitotic_neighbor',
+        'normalized_sum_has_dying_neighbor',
+        'mean_neighbor_avg_lrm_volume_90um', 
+        'mean_neighbor_avg_lrm_height_90um',
+        'mean_neighbor_avg_lrm_xy_aspect_90um',
+        'mean_neighbor_avg_lrm_mesh_sa_90um',
+        'mean_neighbor_avg_dxdt_48_volume_90um',
+        'mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um',
+        ],
+}
+
+TARGET_CONTAINTING_FEATS = {
+    'duration_BC': [
+        'duration_BC',
+        'late_growth_rate_by_endpoints',
+        ],
+    'delta_volume_BC': [
+        'volume_at_C',
+        'delta_volume_BC',
+        'late_growth_rate_by_endpoints',
+        ],
+    None: [
+        '',
+    ]
+}
+
+def get_feature_list(feature_group_list, target):
+    """
+    Get feature list to include in linear model. 
+    Gets full features list and excludes ones that contain target information. 
+    
+    Parameters
+    ----------
+    feature_group_list: list
+        List of feature groups to include in the feature list
+    target: str
+        Target variable to predict
+        
+    Returns
+    -------
+    features: list
+        List of features to include in the linear model
+    """
+    features = []
+    for group in feature_group_list:
+        features = features + FEATURE_GROUPS[group]
+    
+    if target is not None: 
+        features = [feature for feature in features if feature not in TARGET_CONTAINTING_FEATS[target]]
+    
+    return features
+
diff --git a/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py b/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py
new file mode 100644
index 00000000..c1806c82
--- /dev/null
+++ b/nuc_morph_analysis/analyses/linear_regression/supplemental_lrm_figure_workflow.py
@@ -0,0 +1,71 @@
+#%%
+import numpy as np
+from nuc_morph_analysis.lib.preprocessing import global_dataset_filtering, filter_data
+from nuc_morph_analysis.analyses.linear_regression.linear_regression import fit_linear_regression
+from nuc_morph_analysis.analyses.linear_regression.select_features import get_feature_list
+from nuc_morph_analysis.analyses.linear_regression.analysis_plots import (run_regression_workflow,
+                                                                          plot_feature_cluster_correlations, 
+                                                                          plot_heatmap, 
+                                                                          plot_feature_contribution)
+
+#%%
+df_all = global_dataset_filtering.load_dataset_with_features()
+df_full = filter_data.all_timepoints_full_tracks(df_all)
+df_track_level_features = filter_data.track_level_features(df_full)
+
+#%%
+CONFIG = {
+    'all_features': ['start_intrinsic', 'lifetime_intrinsic', 'start_extrinsic', 'lifetime_extrinsic'],
+    'start_intrinsic': ['start_intrinsic'],
+    'lifetime_intrinsic': ['lifetime_intrinsic'],
+    'start_extrinsic': ['start_extrinsic'],
+    'lifetime_extrinsic': ['lifetime_extrinsic'],
+    'intrinsic': ['start_intrinsic', 'lifetime_intrinsic'],
+    'extrinsic': ['start_extrinsic', 'lifetime_extrinsic'],
+}
+FIGDIR='linear_regression/figures/'
+TARGETS = ['duration_BC', 'delta_volume_BC']
+
+#%% Preprocess dataframe to ensure same N for all analysis (full tracks with lineage features)
+dropna_cols = get_feature_list(CONFIG['all_features'], None)
+data = df_track_level_features.dropna(subset=dropna_cols)
+print(f"Number of tracks: {len(data)}")
+
+#%% Create maxtrix of r squared values
+df = run_regression_workflow(TARGETS, CONFIG, data, alpha=0)
+plot_heatmap(df, FIGDIR, 'YlOrRd')
+
+#%%
+RECOMPUTE_ALPHA = True
+TOLERANCE = 0.05
+computed_alpha = {'duration_BC': 1.4,
+                  'delta_volume_BC': 10.2}
+
+#%% Recompute maximum alpha when tolerance is reached
+if RECOMPUTE_ALPHA:
+    for target in ['duration_BC', 'delta_volume_BC']:
+        all_coef_alpha, all_test_sc, all_perms = fit_linear_regression(
+            data, 
+            cols=get_feature_list(CONFIG['all_features'], target), 
+            target=target, 
+            alpha=np.arange(0, 15, 0.2, dtype=float),
+            tol=TOLERANCE, 
+            save_path="figures/", 
+            save=True)
+
+        max_alpha = all_coef_alpha.alpha.max()
+        computed_alpha[target] = max_alpha
+        print(f"{target}: {max_alpha}")
+
+#%% Plot feature importance for max alpha when tolerance is reached
+for target, fig_height in zip(['duration_BC', 'delta_volume_BC'], [6, 2]): 
+    df_alpha, df_test, df_coeff = fit_linear_regression(data, 
+                                                        cols=get_feature_list(CONFIG['all_features'], target), 
+                                                        target=target, alpha=[computed_alpha[target]],
+                                                        tol=TOLERANCE, save_path=FIGDIR, save=False)
+    plot_feature_contribution(df_alpha, df_test, df_coeff, target, computed_alpha[target], fig_height, FIGDIR)
+
+#%% Plot feature correlations using lineage full tracks
+plot_feature_cluster_correlations(data, get_feature_list(CONFIG['all_features'], None), FIGDIR)
+
+# %%
diff --git a/nuc_morph_analysis/analyses/linear_regression/utils.py b/nuc_morph_analysis/analyses/linear_regression/utils.py
new file mode 100644
index 00000000..423cf66b
--- /dev/null
+++ b/nuc_morph_analysis/analyses/linear_regression/utils.py
@@ -0,0 +1,15 @@
+def list_of_strings(arg):
+    return arg.split(",")
+
+
+def list_of_floats(arg):
+    return list(map(float, arg.split(",")))
+
+
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ("yes", "true", "t", "y", "1"):
+        return True
+    elif v.lower() in ("no", "false", "False", "f", "n", "0"):
+        return False
diff --git a/nuc_morph_analysis/lib/preprocessing/add_features.py b/nuc_morph_analysis/lib/preprocessing/add_features.py
index 0700eae0..440f1ab8 100644
--- a/nuc_morph_analysis/lib/preprocessing/add_features.py
+++ b/nuc_morph_analysis/lib/preprocessing/add_features.py
@@ -1,4 +1,5 @@
 from nuc_morph_analysis.analyses.lineage.get_features import lineage_trees
+from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
 import numpy as np
 
 FRAME_COL = {"Ff": "A", "frame_transition": "B", "Fb": "C"}
@@ -140,6 +141,68 @@ def add_feature_at(df, frame_column, feature, feature_column, multiplier=1):
         )
     return df
 
+def add_mean_feature_over_trajectory(df, feature_list, multiplier_list):
+    """
+    Add the mean of a given feature over the growth trajectory 
+    from transition to frame breakdown. 
+    
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe
+    feature_list : list
+        List of column names
+    multiplier_list : list
+        List of scale to multiply the mean by
+        
+    Returns
+    -------
+    df : DataFrame
+        The dataframe with the added mean feature columns
+    """
+    for feature, multiplier in zip(feature_list, multiplier_list):
+        for tid, dft in df.groupby("track_id"):
+            start = dft.frame_transition.values[0]
+            stop = dft.Fb.values[0]
+            df_mean = dft[(dft['index_sequence'] >= start) & (dft['index_sequence'] <= stop)]
+            mean = df_mean[feature].mean() * multiplier
+            df.loc[df.track_id == tid, f"mean_{feature}"] = mean
+    return df
+
+
+def get_early_transient_gr_of_neighborhood(df, scale, time_shift=24, window_length=6):
+    """
+    Get the transient growth rate of the local neighborhood 2 hours into the growth trajectory. 
+    
+    This time shift of two hours into the growth trajectory is necessary because the metric
+    is calculated as the average of a 4 hour rolling window. The middle of a four hour window
+    does not occur until two hours into the timelapse. To calculate this feature equivalently 
+    for each trajectory, two hours was used for all tracks to get a metric for the transient 
+    growth rate of the neighborhood early in the growth trajectory. The early transient growth
+    rate is averaged over 30 minutes as defined by the window_length. 
+    
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe
+    time_shift : int
+        The time shift in frames to calculate the transient growth rate in frames
+    window_length : int
+        The length of the time window in frames to average over
+        
+    Returns
+    -------
+    df : DataFrame
+        The dataframe with the added transient growth rate feature columns
+    """
+    for tid, dft in df.groupby("track_id"):
+        t_calculate = dft.index_sequence.min() + time_shift
+        time_window_mask = dft.index_sequence.between(t_calculate, t_calculate + window_length)
+        transient_gr_90um = dft.loc[time_window_mask, "neighbor_avg_dxdt_48_volume_90um"].mean()
+        df.loc[df.track_id == tid, "early_neighbor_avg_dxdt_48_volume_90um"] = transient_gr_90um * scale
+        
+    return df
+
 
 def add_volume_at(df, pixel_size, frame_column):
     """
@@ -390,6 +453,62 @@ def add_non_interphase_size_shape_flag(df):
     ].any(axis=1)
     return df
 
+def get_sister(df, pid, current_tid):
+    """
+    Gets the track_id of the sibling
+
+    Parameters
+    ----------
+    df: Dataframe
+        The dataset dataframe
+    track_id: int
+        The track_id of the cell
+
+    Returns
+    -------
+    sister_id: List
+        List containing the track_id of the sibling cell
+    """
+    df_sisters = df.loc[df.parent_id == pid]
+    tids = df_sisters.track_id.unique()
+    sister_id = [tid for tid in tids if tid != current_tid]
+    return sister_id
+
+def add_lineage_features(df, feature_list):
+    """
+    If the full track has a full track sister or mother, add the given relative's feature as a single track feature column in the dataframe. 
+    
+    Paramaters
+    ----------
+    df: DataFrame
+        The dataframe
+    feature_list: list
+        List of column names
+        
+    Returns
+    -------
+    df: DataFrame
+        The dataframe with new columns (ie mothers_vol_at_B, sisters_duration)
+    """
+    
+    for feature in feature_list:
+        df[f"mothers_{feature}"] = np.nan
+        df[f"sisters_{feature}"] = np.nan
+
+    df_lineage = df[df['colony'].isin(['small', 'medium'])]
+
+    for tid, dft in df_lineage.groupby("track_id"):
+        parent_id = dft.parent_id.values[0]
+        if parent_id != -1 and parent_id in df_lineage.track_id.unique():
+            for feature in feature_list:
+                df.loc[df.track_id == tid, f"mothers_{feature}"] = df_lineage.loc[df_lineage.track_id == parent_id, feature].values[0]
+        if parent_id != -1:        
+            sister_id = get_sister(df_lineage, parent_id, tid)
+            if len(sister_id) > 0:
+                for feature in feature_list:
+                    df.loc[df.track_id == tid, f"sisters_{feature}"] = df_lineage.loc[df_lineage.track_id == sister_id[0], feature].values[0]
+
+    return df
 
 def sum_events_along_full_track(df0, feature_list, index_columns=['track_id','index_sequence','Fb','frame_transition']):
     """
@@ -468,6 +587,26 @@ def sum_mitotic_events_along_full_track(df0, feature_list=[]):
 
     return sum_events_along_full_track(df0, feature_list)
 
+def normalize_sum_events(df_full, event_cols=['sum_has_mitotic_neighbor', 'sum_has_dying_neighbor']):
+    """
+    Normalize sum of mitotic and death events by growth duration 
+    
+    Parameters
+    ----------
+    df_full: DataFrame
+        The dataframe of full tracks
+    event_cols: list
+        ie. 'sum_has_mitotic_neighbor', 'sum_has_dying_neighbor'
+        
+    Returns
+    -------
+    df_full: DataFrame
+        The dataframe with the normalized sum of events columns
+    """
+    for col in event_cols:
+        df_full[f"normalized_{col}"] = df_full[col] / df_full['duration_BC']
+    return df_full
+
 def add_perimeter_ratio(df): 
     """
     compute ratio of the nucleus perimeter to the pseudo cell perimeter (2d)
@@ -486,4 +625,61 @@ def add_perimeter_ratio(df):
     
     """
     df['2d_perimeter_nuc_cell_ratio'] = df['2d_perimeter_nucleus'] / df['2d_perimeter_pseudo_cell']
-    return df
\ No newline at end of file
+    return df
+
+def add_features_at_transition(df, 
+                               feature_list=['xy_aspect', 
+                                            'SA_vol_ratio', 
+                                            'neighbor_avg_lrm_volume_90um', 
+                                            'neighbor_avg_lrm_height_90um',
+                                            'neighbor_avg_lrm_xy_aspect_90um',
+                                            'neighbor_avg_lrm_mesh_sa_90um',
+                                            'neighbor_avg_dxdt_48_volume_90um',
+                                            'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um']
+                               ):
+    """
+    Add feature measurements at transition that are used in the linear regression analysis.
+    Features should be pre-calculated and not need to be scaled. 
+    
+    Parameters
+    ----------
+    df_full : DataFrame
+        The dataframe containing full trajectories
+    feature_list : list
+        List of column names
+    
+    Returns
+    -------
+    df_full : DataFrame
+        The dataframe with the added feature columns 
+    """
+    for feature in feature_list:
+        df = add_feature_at(df, "frame_transition", feature, feature)
+    return df
+
+def add_mean_features(df, 
+                      feature_list=['neighbor_avg_dxdt_48_volume_90um',
+                                    'neighbor_avg_lrm_volume_90um', 
+                                    'neighbor_avg_lrm_height_90um',
+                                    'neighbor_avg_lrm_xy_aspect_90um',
+                                    'neighbor_avg_lrm_mesh_sa_90um',
+                                    'neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um']
+                      ):
+    """
+    Add mean feature measurements over the growth trajectory that are used in the linear regression analysis.
+    
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe containing full trajectories
+    feature_list : list
+        List of column names
+    
+    Returns
+    -------
+    df : DataFrame
+        The dataframe with the added mean feature columns
+    """
+    multiplier_list = [get_plot_labels_for_metric(x)[0] for x in feature_list]
+    df = add_mean_feature_over_trajectory(df, feature_list, multiplier_list)
+    return df
diff --git a/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py
new file mode 100644
index 00000000..36525e5f
--- /dev/null
+++ b/nuc_morph_analysis/lib/preprocessing/add_neighborhood_avg_features_lrm.py
@@ -0,0 +1,168 @@
+# %%
+import pandas as pd
+from tqdm import tqdm
+import numpy as np
+from multiprocessing import Pool, cpu_count
+from nuc_morph_analysis.lib.preprocessing.load_data import get_dataset_pixel_size
+from nuc_morph_analysis.lib.preprocessing import filter_data
+from nuc_morph_analysis.lib.preprocessing.filter_data import all_timepoints_minimal_filtering
+
+LOCAL_RADIUS_LIST = [90, -1]
+LOCAL_RADIUS_STR_LIST = ["90um", "whole_colony"]
+NEIGHBOR_FEATURE_LIST = ["volume"]
+NEIGHBOR_PREFIX = "neighbor_avg_lrm_"
+
+def run_in_parallel(args):
+    return get_neighbor_avg_at_t(*args)
+
+
+def get_neighbor_avg_at_t(dft, local_radius_list, columns, min_neighbor_thresh=5):
+    """
+    Compute the average value of the columns for each CellId at a given timepoint within a given colony
+    by finding its neighbors within a given radius=local_radius
+
+    Parameters
+    ----------
+    dft : pd.DataFrame
+        dataframe that minimally has the following columns:
+        ['index_sequence','track_id','centroid_x','centroid_y']
+        with index = "CellId"
+    local_radius_list : list
+        list of integers that represent the radius in microns to use for finding neighbors
+        -1 signifies the whole colony
+    columns : list
+        list of column names to compute the average value for
+    min_neighbor_thresh : int
+        minimum number of neighbors required to compute the average value
+
+    Returns
+    -------
+    dft : pd.DataFrame
+        dataframe with the average value of the columns for each CellId at a given timepoint within a given colony
+    """
+
+    # now get the centroid values for all CellIds at that index sequence
+    centroid_xy = dft[["centroid_x", "centroid_y"]].values
+    # now compute pair-wise distances between all centroid_xy values
+    dists = np.linalg.norm(centroid_xy[:, None] - centroid_xy[None], axis=2)
+
+    # initialize the new columns to be added to the dataframe
+    # to avoid the PerformanceWarning: DataFrame is highly fragmented occuring in the last line of this function
+    data = {}
+    for li, local_radius in enumerate(local_radius_list):
+        local_radius_str = LOCAL_RADIUS_STR_LIST[li]
+        for column in columns:
+            data[f"{NEIGHBOR_PREFIX}{column}_{local_radius_str}"] = np.nan
+
+    dftnew = pd.DataFrame(data, index=dft.index)
+
+    for li, local_radius in enumerate(local_radius_list):
+        local_radius_str = LOCAL_RADIUS_STR_LIST[li]
+        # now for each CellId find the neighbors within a circle with radius = radius
+        pix_size = get_dataset_pixel_size(dft.colony.values[0])
+        radius = local_radius / pix_size  # pixels
+        if local_radius < 0:
+            neighbors = np.ones(dists.shape, dtype=bool)
+        else:
+            neighbors = dists < radius
+
+        # don't allow "self" to be a neighbor
+        np.fill_diagonal(neighbors, False)
+
+        for column in columns:
+            col_vals1d = dft[column].values
+            col_vals2d = np.tile(col_vals1d, (col_vals1d.size, 1)).astype(float)
+            # now to make the diagonal values nan so that the cell of interest is not included in the average, but only the neighbors
+            col_vals2d[~neighbors] = np.nan
+
+            # some rows could be all nan
+            # so we need to track the indices of the rows that are not all nan
+            # and only use those indices to update the dataframe
+            # this is important for avoiding an annoyint RuntimeWarning: Mean of empty slice
+            non_nan_neighbors_per_row = np.sum(~np.isnan(col_vals2d), axis=1)
+
+            non_nan_rows = non_nan_neighbors_per_row >= min_neighbor_thresh
+            col_vals2d = col_vals2d[non_nan_rows]
+
+            # now compute the average of the column values for each row (average of the neighbors)
+            # Compute the average value of the column for each neighborhood
+            col_vals_avg = np.nanmean(col_vals2d, axis=1)
+
+            # Add the average values to the dataframe
+            # dft.loc[non_nan_rows,f'{prefix}{column}_{local_radius_str}'] = col_vals_avg
+            dftnew.loc[
+                dft.index.values[non_nan_rows], f"{NEIGHBOR_PREFIX}{column}_{local_radius_str}"
+            ] = col_vals_avg
+    dft = dft.join(dftnew)
+    return dft
+
+
+def run_script(
+    df,
+    num_workers=1,
+    feature_list=NEIGHBOR_FEATURE_LIST,
+    local_radius_list=LOCAL_RADIUS_LIST,
+    exclude_outliers=True,
+):
+    """
+    Determine average values of features within neighborhoods of defined radius around each cell
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        dataframe with index=CellId, and the following columns minimally:
+        ['index_sequence','track_id','centroid_x','centroid_y'] + feature_list + [x for x in df.columns if 'dxdt' in x]
+    num_workers : int, optional
+        number of workers to use for parallel processing. The default is 1.
+    feature_list : list, optional
+        list of column names that represent the features to proces. The default is NEIGHBOR_FEATURE_LIST
+    local_radius_list : list, optional
+        list of integers that represent the radius in microns to use for finding neighbors. The default is LOCAL_RADIUS_LIST.
+        -1 signifies the whole colony
+    exclude_outliers : bool, optional
+        whether to exclude outliers. The default is False.
+
+    Returns
+    -------
+    dforig : pd.DataFrame
+        original dataframe with the newly computed columns added
+        index of dataframe = CellId
+    """
+
+    dforig = df.copy()
+    original_columns = dforig.columns.tolist()
+    if exclude_outliers:
+        # to ensure that outlier data points are not used for the neighborhood avgs, filter out time point outliers here
+        df = all_timepoints_minimal_filtering(df)
+
+        # also be sure to filter out non-interphase cells from neighborhood
+        # df = filter_data.filter_out_non_interphase_size_shape_flag(df)
+        # df = filter_data.filter_out_cells_entering_or_exiting_mitosis(df)
+
+    for colony in df.colony.unique():
+        dfi = df[df["colony"] == colony]
+        pass_cols = ["index_sequence", "colony", "track_id", "centroid_x", "centroid_y"]
+
+        columns = feature_list + [x for x in dfi.columns if "dxdt" in x]
+
+        # first find the unique index_sequence values
+        index_sequences = dfi["index_sequence"].unique()
+        dfin = dfi[
+            pass_cols + columns
+        ]  # reduce the size of the dataframe being passed in by only including necessary columns
+
+        # run in parallel
+        args_list = [
+            (dfin[dfin["index_sequence"] == t], local_radius_list, columns) for t in index_sequences
+        ]
+        num_workers = np.min([num_workers, cpu_count()])
+        if num_workers == 1:
+            results = list(map(run_in_parallel, args_list))
+        else:
+            results = list(Pool(num_workers).imap_unordered(run_in_parallel, args_list))
+        dft = pd.concat(results)
+        new_cols = [x for x in dft.columns if x not in original_columns]
+        dforig.loc[dft.index, new_cols] = dft[new_cols]
+    return dforig
+
+# %%
diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
index 30deef53..30eb05cf 100644
--- a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
+++ b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
@@ -9,6 +9,7 @@
     is_tp_outlier,
     add_features,
     add_neighborhood_avg_features,
+    add_neighborhood_avg_features_lrm,
     compute_change_over_time,
 )
 from nuc_morph_analysis.analyses.volume import add_growth_features
@@ -17,6 +18,7 @@
     add_fov_touch_timepoint_for_colonies,
 )
 from nuc_morph_analysis.analyses.height.add_colony_time import add_colony_time_all_datasets
+from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
 from nuc_morph_analysis.lib.preprocessing import labeling_neighbors_helper
 
 def load_dataset_with_features(
@@ -198,6 +200,9 @@ def process_all_tracks(df, dataset, remove_growth_outliers, num_workers):
     df = add_features.add_non_interphase_size_shape_flag(df)
     df = add_change_over_time(df)
     df = add_neighborhood_avg_features.run_script(df, num_workers=num_workers)
+    df = add_neighborhood_avg_features_lrm.run_script(df, num_workers=num_workers, 
+                                                feature_list=["volume", "height", "xy_aspect", "mesh_sa", "2d_area_nuc_cell_ratio"],
+                                                exclude_outliers=True)
 
     if dataset == "all_baseline":
         df = add_colony_time_all_datasets(df)
@@ -245,7 +250,7 @@ def process_full_tracks(df_all, thresh, pix_size, interval):
         df_full = add_features.add_location_at(df_full, frame, "y")
         df_full = add_features.add_time_at(df_full, frame, interval)
         df_full = add_features.add_colony_time_at(df_full, frame, interval)
-
+    
     df_full = add_features.add_duration_in_frames(df_full, "Ff", "frame_transition")
     df_full = add_features.add_duration_in_frames(df_full, "frame_transition", "Fb")
     df_full = add_features.add_duration_in_frames(df_full, "Ff", "Fb")
@@ -256,8 +261,15 @@ def process_full_tracks(df_all, thresh, pix_size, interval):
     df_full = add_growth_features.add_early_growth_rate(df_full, interval)
     df_full = add_growth_features.add_late_growth_rate_by_endpoints(df_full)
     df_full = add_growth_features.fit_tracks_to_time_powerlaw(df_full, "volume", interval)
-
+    
+    # For LRM
+    df_full = add_features.add_lineage_features(df_full, feature_list=['volume_at_B', 'duration_BC', 'volume_at_C', 'delta_volume_BC'])
+    df_full = add_features.add_feature_at(df_full, "frame_transition", 'height', 'height_percentile', pix_size) 
+    df_full = add_features.add_features_at_transition(df_full)
+    df_full = add_features.get_early_transient_gr_of_neighborhood(df_full, scale=get_plot_labels_for_metric('neighbor_avg_dxdt_48_volume_90um')[0])
     df_full = add_features.sum_mitotic_events_along_full_track(df_full)
+    df_full = add_features.normalize_sum_events(df_full)
+    df_full = add_features.add_mean_features(df_full)
 
     # Add flag for use after merging back to main manifest
     df_full = add_features.add_full_track_flag(df_full)
diff --git a/nuc_morph_analysis/lib/visualization/label_tables.py b/nuc_morph_analysis/lib/visualization/label_tables.py
index df39dec6..108b9560 100644
--- a/nuc_morph_analysis/lib/visualization/label_tables.py
+++ b/nuc_morph_analysis/lib/visualization/label_tables.py
@@ -141,100 +141,126 @@ def get_scale_factor_table(dataset="all_baseline"):
 LABEL_TABLE = {
     # Times and durations
     ("sync_time_Ff", "index_sequence"): "Time",
-    "normalized_time": "Normalized Interphase Time",
-    "colony_time": "Aligned Colony Time",
+    "normalized_time": "Normalized interphase time",
+    "colony_time": "Aligned colony time",
     ("Ff", "frame_formation"): "Formation",
-    ("frame_inflection", "frame_transition"): "Transtion",
+    ("frame_inflection", "frame_transition"): "Transition",
     ("Fb", "frame_breakdown"): "Breakdown",
-    "time_at_B": "Movie Time at Transition",
-    "time_at_C": "Movie Time at Breakdown",
-    "colony_time_at_B": "Aligned Colony Time at Transition",
-    "colony_time_at_C": "Aligned Colony Time at Breakdown",
-    "duration_AB": "Rapid Expansion Duration",
-    ("duration_BC", "duration_BC_hr"): "Growth Duration",
+    "time_at_A": "Time at formation",
+    "time_at_B": "Starting movie time",
+    "time_at_C": "Movie time at breakdown",
+    "colony_time_at_A": "Aligned colony time at formation",
+    "colony_time_at_B": "Starting aligned colony time",
+    "colony_time_at_C": "Ending aligned colony time",
+    "duration_AB": "Rapid expansion duration",
+    ("duration_BC", "duration_BC_hr"): "Growth duration",
     # Volume
     "volume": "Volume",
-    "volume_at_A": "Volume at Formation",
-    "volume_at_B": "Starting Volume",
-    "volume_at_C": "Ending Volume",
-    "Volume_C": "Volume at Breakdown",
-    "volume_fold_change_BC": "Volume Fold-Change",
-    "volume_fold_change_fromB": "Volume Fold-Change relative to Starting Volume",
-    "delta_volume_BC": "Added Volume",
-    "difference_volume_at_B": "Difference in Starting Volume",
-    "difference_half_vol_at_C_and_B": "1/2 Mother Ending Volume - Daughter Starting Volume",
-    "avg_sister_volume_fold_change_BC": "Sisters Average Volume Fold-Change",
-    "avg_sister_volume_at_B": "Sisters Average Starting Volume",
+    "volume_at_A": "Volume at formation",
+    "volume_at_B": "Starting volume",
+    "volume_at_C": "Ending volume",
+    "Volume_C": "Volume at breakdown",
+    "volume_fold_change_BC": "Volume fold-change",
+    "volume_fold_change_fromB": "Volume fold-change relative to starting volume",
+    "delta_volume_BC": "Added volume",
+    "difference_volume_at_B": "Difference in starting volume",
+    "difference_half_vol_at_C_and_B": "1/2 mother ending volume - daughter starting volume",
+    "avg_sister_volume_fold_change_BC": "Sisters average volume fold-change",
+    "avg_sister_volume_at_B": "Sisters average starting volume",
     "volume_sub": "Change in volume",
     # Growth rates
-    "exp_growth_coeff_BC": "Exponetial Growth Coeff. B to C",
-    "linear_growth_rate_BC": "Late Growth Rate",
-    "tscale_linearityfit_volume": "Fitted Time Scaling Factor (\u03B1)",
-    "RMSE_linearityfit_volume": "Root Mean Squared Error",
-    "late_growth_rate_by_endpoints": "Growth Rate",
+    "exp_growth_coeff_BC": "Exponential growth coeff. B to C",
+    "linear_growth_rate_BC": "Late growth rate",
+    "tscale_linearityfit_volume": "Fitted time scaling factor (\u03B1)",
+    "RMSE_linearityfit_volume": "Root mean squared error",
+    "late_growth_rate_by_endpoints": "Growth rate",
     "dxdt_t2-dxdt_t1": "Late average transient growth rate - early average transient growth rate",
     # Height
     "height": "Height",
-    "avg_height": "Average Height",
-    "height_fold_change_BC": "Growth Height Fold-Change",
-    "height_at_B": "Starting Height",
-    "height_at_C": "Ending Height",
+    "avg_height": "Average height",
+    "height_fold_change_BC": "Growth height fold-change",
+    "height_at_B": "Starting height",
+    "height_at_C": "Ending height",
     # Surface Area
-    "mesh_sa": "Surface Area",
-    "SA_at_A": "Surface Area at A",
-    "SA_at_B": "Surface Area at B",
-    "SA_at_C": "Surface Area at C",
-    "delta_SA_BC": "\u0394Surface Area B to C",
-    "SA_fold_change_BC": "Surface Area Fold-Change B to C",
-    "SA_fold_change_fromB": "Surface Area Fold-Change",
-    "SA_vol_ratio": "SA/Volume",
-    "tscale_linearityfit_SA": "Fitted Surface Area Time Scaling (\u03B1)",
-    "RMSE_linearityfit_SA": "Root Mean Squared Error",
+    "mesh_sa": "Surface area",
+    "SA_at_A": "Surface area at A",
+    "SA_at_B": "Starting surface area",
+    "SA_at_C": "Ending surface area",
+    "delta_SA_BC": "\u0394Surface area B to C",
+    "SA_fold_change_BC": "Surface area fold-change B to C",
+    "SA_fold_change_fromB": "Surface area fold-change",
+    "SA_vol_ratio": "SA/volume",
+    "tscale_linearityfit_SA": "Fitted surface area time scaling (\u03B1)",
+    "RMSE_linearityfit_SA": "Root mean squared error",
     # Dimensions beyond height
     "length": "XY short axis width",
     "width": "XY long axis length",
     # Aspect Ratio
-    "xy_aspect": "XY Aspect Ratio",
-    "xz_aspect": "XZ Aspect Ratio",
-    "zy_aspect": "YZ Aspect Ratio",
-    "xz_aspect_fold_change_BC": "XZ Aspect Ratio Fold-Change B to C",
-    "avg_xz_aspect_ratio": "Average XZ Aspect Ratio",
-    "xz_aspect_at_B": "XZ Aspect Ratio at B",
+    "xy_aspect": "Xy aspect ratio",
+    "xz_aspect": "Xz aspect ratio",
+    "zy_aspect": "Yz aspect ratio",
+    "xz_aspect_fold_change_BC": "Xz aspect ratio fold-change B to C",
+    "avg_xz_aspect_ratio": "Average xz aspect ratio",
+    "xz_aspect_at_B": "Starting xz aspect ratio",
     # Colony Position
     "distance": "Distance",
-    "distance_from_centroid": "Distance From Centroid",
-    "normalized_distance_from_centroid": "Normalized Distance From Centroid",
-    "max_distance_from_centroid": "Max Distance From Centroid",
-    "colony_depth": "Colony Depth",
-    "normalized_colony_depth": "Normalized Colony Depth",
-    "max_colony_depth": "Max Colony Depth",
-    "avg_colony_depth": "Average Colony Depth",
+    "distance_from_centroid": "Distance from centroid",
+    "normalized_distance_from_centroid": "Normalized distance from centroid",
+    "max_distance_from_centroid": "Max distance from centroid",
+    "colony_depth": "Colony depth",
+    "normalized_colony_depth": "Normalized colony depth",
+    "max_colony_depth": "Max colony depth",
+    "avg_colony_depth": "Average colony depth",
     # Density
-    "colony_non_circularity": "Colony Non-circularity",
-    "colony_non_circularity_scaled": "Scaled Colony Non-circularity",
-    "avg_early_density": "Early Density",
-    "avg_late_density": "Late Density",
+    "colony_non_circularity": "Colony non-circularity",
+    "colony_non_circularity_scaled": "Scaled colony non-circularity",
+    "avg_early_density": "Early density",
+    "avg_late_density": "Late density",
     "density": "Density",
-    "avg_density": "Average Density",
+    "avg_density": "Average density",
     # Lineage
     "parent_id": "Parent ID",
     "family_id": "Family ID",
+    "sisters_volume_at_B": "Sisters starting volume",
+    "sisters_duration_BC": "Sisters growth duration",
+    "sisters_delta_volume_BC": "Sisters added volume",
     # Flags
-    "is_outlier": "Outlier Flag",
-    "is_tp_outlier": "Single Timepoint Outlier",
-    "is_outlier_track": "Outlier Track Flag",
-    "is_growth_outlier": "Growth Feature Outlier Flag",
-    "fov_edge": "FOV-edge Flag",
-    "termination": "Track Termination",
-    "is_full_track": "Full Interphase Track Flag",
+    "is_outlier": "Outlier flag",
+    "is_tp_outlier": "Single timepoint outlier",
+    "is_outlier_track": "Outlier track flag",
+    "is_growth_outlier": "Growth feature outlier flag",
+    "fov_edge": "FOV-edge flag",
+    "termination": "Track termination",
+    "is_full_track": "Full interphase track flag",
     # colony segmentations
     "colony_area": "area of colony (brightfield)",
     "nucleus_colony_area_ratio": "ratio of nuclear area to colony area",
     "seg_twoD_zMIP_area": "total projected nuclear area",
+    # LRM feats
+    "height_at_B": "Starting height",
+    "density_at_B": "Starting density",
+    "xy_aspect_at_B": "Starting XY aspect ratio",
+    "SA_vol_ratio_at_B": "Starting surface area/volume ratio",
+    "early_neighbor_avg_dxdt_48_volume_90um": "Neighborhood avg. ~starting transient growth rate",
+    'neighbor_avg_lrm_volume_90um_at_B': "Neighborhood avg. starting volume", 
+    'neighbor_avg_lrm_height_90um_at_B': "Neighborhood avg. starting height",
+    'neighbor_avg_lrm_density_90um_at_B': "Neighborhood avg. starting density",
+    'neighbor_avg_lrm_xy_aspect_90um_at_B': "Neighborhood avg. starting XY aspect ratio",
+    'neighbor_avg_lrm_mesh_sa_90um_at_B': "Neighborhood avg. starting surface area",
+    "mean_neighbor_avg_dxdt_48_volume_90um": "Neighborhood avg. mean transient growth rate",
+    'mean_neighbor_avg_lrm_volume_90um': "Neighborhood avg. mean volume", 
+    'mean_neighbor_avg_lrm_height_90um': "Neighborhood avg. mean height",
+    'mean_neighbor_avg_lrm_density_90um': "Neighborhood avg. mean density",
+    'mean_neighbor_avg_lrm_xy_aspect_90um': "Neighborhood avg. mean XY aspect ratio",
+    'mean_neighbor_avg_lrm_mesh_sa_90um': "Neighborhood avg. mean surface area",
+    'mean_neighbor_avg_lrm_2d_area_nuc_cell_ratio_90um': "Neighborhood avg. mean density",
+    
     # mitotic and apoptotic neighbor columns
     "number_of_frame_of_breakdown_neighbors": "# of neighboring cells undergoing breakdown",
     "number_of_frame_of_formation_neighbors": "# of neighboring cells undergoing formation",
     "number_of_frame_of_death_neighbors": "# of neighboring cells undergoing death",
+    "normalized_sum_has_mitotic_neighbor": "Frequency of mitotic adjacent neighbors",
+    "normalized_sum_has_dying_neighbor": "Frequency of dying adjacent neighbors",  
     # 2D area features
     "2d_area_nuc_cell_ratio": "Nucleus area/(Pseudo)cell area",
     "2d_area_nucleus": "Nuclear area",
@@ -246,7 +272,7 @@ def get_scale_factor_table(dataset="all_baseline"):
     "2d_intensity_mean_edge" : "Average distance to (pseudo)cell edge",
     "2d_intensity_max_edge" : "Max distance to (pseudo)cell edge",
 }
-# now add the dxdt columns
+
 
 
 def convert_to_hr(bin_interval, dataset="all_baseline"):
diff --git a/run_all_manuscript_workflows.py b/run_all_manuscript_workflows.py
index 8694c539..7abf1180 100644
--- a/run_all_manuscript_workflows.py
+++ b/run_all_manuscript_workflows.py
@@ -58,6 +58,9 @@ def figureS1_segmentation_model_validation():
         
     def supplemental_figure_cell_health():
         import nuc_morph_analysis.analyses.cell_health.cell_health_workflow
+        
+    def supplemental_figure_linear_regression_model():
+        import nuc_morph_analysis.analyses.linear_regression.supplemental_lrm_figure_workflow
 
 
 ALL_WORKFLOWS = get_jobs(Workflows)