AllenCell · jcass11 · Nov 19, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/nuc_morph_analysis/analyses/volume/add_growth_features.py b/nuc_morph_analysis/analyses/volume/add_growth_features.py
@@ -2,7 +2,7 @@
 import matplotlib.pyplot as plt
 import seaborn as sb
 from scipy.optimize import curve_fit
-from nuc_morph_analysis.lib.preprocessing.curve_fitting import powerfunc
+from nuc_morph_analysis.lib.preprocessing.curve_fitting import powerfunc, line, exponential
 from nuc_morph_analysis.lib.visualization.notebook_tools import save_and_show_plot
 from nuc_morph_analysis.lib.visualization.plotting_tools import get_plot_labels_for_metric
 
@@ -167,8 +167,131 @@ def fit_tracks_to_time_powerlaw(
     return df
 
 
+def fit_tracks_to_model(
+    df,
+    interval,
+    model="power",
+    plot=False,
+):
+    """
+    This function fits the volume of each track to either a power law, exponential or
+    linear model and adds the fit parameters and RMSE of the fit to the input dataframe.
+
+    Parameters
+    ----------
+    df : DataFrame
+        full tracks dataframe
+    interval : float
+        The time interval in minutes
+    model : str, optional
+        The model to fit to. The default is "power". Other options are "exponential" and "linear".
+    plot : bool, optional
+        If True, a plot of the volume vs time for each track and its exponential fit is displayed. The default is False.
+
+    Returns
+    -------
+    df : Dataframe
+        The input dataframe with an additional columns with linearity fit parameters added
+    """
+
+    # get parameters based on fitting volume
+    atB_0 = 550
+    t_scale0 = np.nan
+    model_name = model
+    if model == "power":
+        rate_0 = 35
+        tscale_0 = 1
+        model_name = "linearity"
+        modelfunc = powerfunc
+    elif model == "exponential":
+        rate_0 = 0.05
+        modelfunc = exponential
+    elif model == "linear":
+        rate_0 = 50
+        modelfunc = line
+    else:
+        raise ValueError(
+            "Function currently only fits volumes to power law, exponential or linear model."
+        )
+    trackdir = f"volume/figures/linearity/volume/tracks/"
+    yscale, ylabel, yunits, _ = get_plot_labels_for_metric("volume")
+
+    features = [
+        f"tscale_{model_name}fit_volume",
+        f"atB_{model_name}fit_volume",
+        f"rate_{model_name}fit_volume",
+        f"RMSE_{model_name}fit_volume",
+    ]
+    for feature in features:
+        df[feature] = np.nan
+
+    fail_count = 0
+    for track, df_track in df.groupby("track_id"):
+
+        try:
+            # trim tracks to transition to breakdown
+            transition = df_track["frame_transition"].min()
+            fb = df_track["Fb"].values.min()
+            df_track = df_track.sort_values("index_sequence")
+            df_track_trim = df_track[
+                (df_track.index_sequence > transition) & (df_track.index_sequence <= fb)
+            ]
+
+            # get trimmed track times and volumes
+            x = df_track_trim["index_sequence"].values * interval / 60
+            x -= x[0]
+            y = df_track_trim["volume"].values * yscale
+
+            # fit trimmed track to model with initial guesses
+            if model == "power":
+                popt, _ = curve_fit(modelfunc, x, y, [rate_0, atB_0, tscale_0])
+                tscale = popt[2]
+            else:
+                popt, _ = curve_fit(modelfunc, x, y, [rate_0, atB_0])
+                tscale = np.nan
+
+            # get fits parameters, residuals and mse
+            rate = popt[0]
+            atB = popt[1]
+            z = modelfunc(x, *popt)
+            res = z - y
+            rmse = np.sqrt(np.nanmean(res**2))
+
+            # plot real track and fitted model
+            if plot is True and model == "power":
+                plt.plot(x, y, label=f"Track ID: {track}", c="grey", alpha=0.5)
+                plt.plot(
+                    x,
+                    z,
+                    c="red",
+                    alpha=0.5,
+                    label=f"tscale {np.round(tscale,2)}, atB {np.round(atB)}, "
+                    f"r {np.round(rate)}, RMSE {np.round(rmse)}",
+                )
+                plt.legend()
+                plt.ylabel(f"{ylabel} {yunits}")
+                plt.xlabel("Time (hr)")
+                plt.tight_layout()
+                save_and_show_plot(f"{trackdir}/track{track}", quiet=True)
+                plt.close()
+
+            # add fit parameters and error to manifest
+            df.loc[df_track.index, f"tscale_{model_name}fit_volume"] = tscale
+            df.loc[df_track.index, f"atB_{model_name}fit_volume"] = atB
+            df.loc[df_track.index, f"rate_{model_name}fit_volume"] = rate
+            df.loc[df_track.index, f"RMSE_{model_name}fit_volume"] = rmse
+
+        except Exception:
+            fail_count += 1
+
+    if fail_count > 0:
+        print(f"Failed volume {model} model fit count: {fail_count}")
+
+    return df
+
+
 def plot_fit_parameter_distribution(
-    df_all, figdir, feature, by_colony_flag=False, density_flag=True, nbins=20
+    df_all, figdir, feature, by_colony_flag=False, density_flag=True, allmodels_flag=False, nbins=20
 ):
     """
     This function plots the mean volume fold change for
@@ -185,6 +308,8 @@ def plot_fit_parameter_distribution(
         Flag for whether to separate data out by colony
     density_flag: bool
         Flag for whether to use density instead of histogram
+    allmodels_flag: bool
+        Flag for whether to plot all three volume model errors (power law, exponential, linear)
     nbins: int
         Number of bins for histogram
     """
@@ -212,33 +337,56 @@ def plot_fit_parameter_distribution(
 
     _, ax = plt.subplots(1, 1, figsize=(5, 4), dpi=300)
 
-    for color, colony, label, ind in zip(colors, colonies, labels, [0, 2, 4, 6]):
+    if not allmodels_flag:
+
+        for color, colony, label, ind in zip(colors, colonies, labels, [0, 2, 4, 6]):
+
+            if by_colony_flag and colony != "all_baseline":
+                df = df_all[df_all["colony"] == colony]
+            else:
+                df = df_all
+
+            if density_flag:
+                # get kde and find feature value giving max to add to legend
+                sb.kdeplot(df[feature], color=color, ax=ax)
+                x = ax.lines[ind].get_xdata()  # Get the x data of the distribution
+                y = ax.lines[ind].get_ydata()  # Get the y data of the distribution
+                maxid = np.argmax(y)  # The id of the peak (maximum of y data)
+                label += f" ({np.round(x[maxid],2)})"
+                # replot kde now with complete label
+                sb.kdeplot(df[feature], color=color, label=label, ax=ax)
+
+            else:
+                plt.hist(df[feature], histtype="step", bins=nbins, color=color, label=label)
+
+    else:
+
+        for color, model, model_name, ind in zip(
+                ["k", "r", "b"],
+                ["linearity", "linear", "exponential"],
+                ["Power law", "Linear", "Exponential"],
+                [0, 2, 4]):
 
-        if by_colony_flag and colony != "all_baseline":
-            df = df_all[df_all["colony"] == colony]
-        else:
             df = df_all
 
-        if density_flag:
             # get kde and find feature value giving max to add to legend
-            sb.kdeplot(df[feature], color=color, ax=ax)
+            sb.kdeplot(df[f"RMSE_{model}fit_volume"], color=color, ax=ax)
             x = ax.lines[ind].get_xdata()  # Get the x data of the distribution
             y = ax.lines[ind].get_ydata()  # Get the y data of the distribution
             maxid = np.argmax(y)  # The id of the peak (maximum of y data)
-            label += f" ({np.round(x[maxid],2)})"
+            label = f"{model_name} ({np.round(x[maxid],2)})"
             # replot kde now with complete label
-            sb.kdeplot(df[feature], color=color, label=label, ax=ax)
+            sb.kdeplot(df[f"RMSE_{model}fit_volume"], color=color, label=label, ax=ax)
 
-        else:
-            plt.hist(df[feature], histtype="step", bins=nbins, color=color, label=label)
 
     if "tscale" in feature:
         plt.axvline(1, color="k", linestyle=":")
     if "RMSE" in feature:
-        plt.axvline(5.49, color="k", linestyle=":", label=f"Error (5.49 {xunits[1:-1]})")
+        plt.axvline(5.54, color="k", linestyle=":", label=f"Segmnetation error (5.54 {xunits[1:-1]})")
 
     plt.xlabel(f"{xlabel} {xunits}")
     plt.ylabel(ylabel)
+    plt.xlim(0, 50)
     plt.legend(prop={"size": 12})
     plt.tight_layout()
     save_and_show_plot(figlabel)

diff --git a/nuc_morph_analysis/analyses/volume/local_growth_workflow.py b/nuc_morph_analysis/analyses/volume/local_growth_workflow.py
@@ -87,12 +87,17 @@
 # both pooled for all baseline colonies and separated by colony
 for feature in ["tscale_linearityfit_volume", "RMSE_linearityfit_volume"]:
     for by_colony_flag in [True, False]:
+        if "RMSE" in feature and not by_colony_flag:
+            allmodels_flag=True
+        else:
+            allmodels_flag=False
         plot_fit_parameter_distribution(
             df_track_level_features,
             figdir,
             feature,
             by_colony_flag=by_colony_flag,
             density_flag=True,
+            allmodels_flag=allmodels_flag,
         )
 
 # %% plot relationships of alpha to fold change and colony time for all data pooled

diff --git a/nuc_morph_analysis/lib/preprocessing/curve_fitting.py b/nuc_morph_analysis/lib/preprocessing/curve_fitting.py
@@ -23,6 +23,28 @@ def line(x, a, b):
     return a * x + b
 
 
+def exponential(x, a, b):
+    """
+    This function gives the y values of an exponetial
+    of the form y = b * e ^ (x * a)
+
+    Parameters
+    ----------
+    x: Number or ndarray
+        The x values
+    a: Number
+        Scaling of exponent with time
+    b: Number
+        y-intercept of the line
+
+    Returns
+    -------
+    Number or ndarray
+        y = b * e ^ (x * a)
+    """
+    return b * np.exp(x * a)
+
+
 def powerfunc(x, a, b, c):
     """
     This function gives the y values of power law

diff --git a/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py b/nuc_morph_analysis/lib/preprocessing/global_dataset_filtering.py
@@ -254,6 +254,8 @@ def process_full_tracks(df_all, thresh, pix_size, interval):
     df_full = add_growth_features.add_early_growth_rate(df_full, interval)
     df_full = add_growth_features.add_late_growth_rate_by_endpoints(df_full)
     df_full = add_growth_features.fit_tracks_to_time_powerlaw(df_full, "volume", interval)
+    df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "exponential")
+    df_full = add_growth_features.fit_tracks_to_model(df_full, interval, "linear")
 
     df_full = add_features.sum_mitotic_events_along_full_track(df_full)