diff --git a/docs/PREPROCESSING.md b/docs/PREPROCESSING.md
index 5b50e0f..96c3254 100644
--- a/docs/PREPROCESSING.md
+++ b/docs/PREPROCESSING.md
@@ -52,7 +52,7 @@ src
 
 # Polymorphic structures: Generate SDFs
 
-Use the segmentation data for polymorphic structures as input to the SDF generation step. 
+Use the segmentation data for polymorphic structures as input to the SDF generation step.
 
 ```
 src
@@ -65,8 +65,8 @@ src
                └── pc_sdfs.py    <- Sample point clouds from scaled meshes
 ```
 
-The scale factors can be computed using the `get_max_bounding_box` script. Alternatively, the pre-computed scale factors can be downloaded along with the rest of the preprocessed data. The following scale factors are available for download 
+The scale factors can be computed using the `get_max_bounding_box` script. Alternatively, the pre-computed scale factors can be downloaded along with the rest of the preprocessed data. The following scale factors are available for download
 
 1. [WTC-11 hIPSc single cell image dataset v1 nucleolus (NPM1)](https://open.quiltdata.com/b/allencell/tree/aics/morphology_appropriate_representation_learning/preprocessed_data/npm1/scale_factor.npz)
 2. [WTC-11 hIPSc single cell image dataset v1 nucleolus (NPM1) 64 resolution](https://open.quiltdata.com/b/allencell/tree/aics/morphology_appropriate_representation_learning/preprocessed_data/npm1_64_res/scale_factor.npz)
-3. [WTC-11 hIPSc single cell image dataset v1 polymorphic structures](https://open.quiltdata.com/b/allencell/tree/aics/morphology_appropriate_representation_learning/preprocessed_data/other_polymorphic/scale_factor.npz)
\ No newline at end of file
+3. [WTC-11 hIPSc single cell image dataset v1 polymorphic structures](https://open.quiltdata.com/b/allencell/tree/aics/morphology_appropriate_representation_learning/preprocessed_data/other_polymorphic/scale_factor.npz)
diff --git a/docs/USAGE.md b/docs/USAGE.md
index 20ec5a6..dd0b575 100644
--- a/docs/USAGE.md
+++ b/docs/USAGE.md
@@ -226,4 +226,3 @@ To run a baseline LDA on the DMSO subset of the drug perturbation dataset, run
 ```
 python src/br/analysis/run_drugdata_LDA.py --save_path "./outputs_npm1_perturb/" --embeddings_path "./morphology_appropriate_representation_learning/model_embeddings/npm1_perturb/" --dataset_name "npm1_perturb" --raw_path "./NPM1_single_cell_drug_perturbations/" --baseline True
 ```
-
diff --git a/src/br/analysis/analysis_utils.py b/src/br/analysis/analysis_utils.py
index b28bd74..ab79be2 100644
--- a/src/br/analysis/analysis_utils.py
+++ b/src/br/analysis/analysis_utils.py
@@ -634,6 +634,56 @@ def archetypes_polymorphic(this_save_path, archetypes_df, all_ret, all_features)
     arch_dict.to_csv(this_save_path / "archetypes.csv")
 
 
+def make_pca_score_plot(this_save_path, all_ret, stratify_key, dataset_name):
+    cols = [i for i in all_ret.columns if "mu" in i]
+    feats = all_ret[cols].values
+
+    pca = PCA()
+    pca_feats = pca.fit_transform(feats)
+    pca_feats = pd.DataFrame(pca_feats, columns=[f"PC_{i+1}" for i in range(len(cols))])
+    pca_feats = pd.concat([pca_feats, all_ret], axis=1)
+
+    n_colors = len(all_ret[stratify_key].unique())
+    colors = sns.color_palette("Paired", n_colors)
+
+    hue_order = None
+    if dataset_name == "pcna":
+        hue_order = [
+            "G1",
+            "earlyS",
+            "earlyS-midS",
+            "midS",
+            "midS-lateS",
+            "lateS",
+            "lateS-G2",
+            "G2",
+        ]
+        colors = (
+            sns.color_palette("Greens", 2)
+            + sns.color_palette("Reds", 4)
+            + sns.color_palette("Purples", 2)
+        )
+    elif dataset_name == "other_polymorphic":
+        hue_order = ["1.0", "2.0", "3.0", "4.0", ">=5"]
+
+    fig, ax = plt.subplots(1, 1, figsize=(5, 5))
+    g = sns.scatterplot(
+        ax=ax,
+        data=pca_feats,
+        x="PC_1",
+        y="PC_2",
+        hue=stratify_key,
+        hue_order=hue_order,
+        palette=colors,
+    )
+    g.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1)
+
+    fig.savefig(this_save_path / f"pcs_{dataset_name}.png", bbox_inches="tight", dpi=300)
+    fig.savefig(this_save_path / f"pcs_{dataset_name}.pdf", bbox_inches="tight", dpi=300)
+
+    pca_feats.to_csv(this_save_path / f"pcs_{dataset_name}.csv")
+
+
 def make_pacmap(this_save_path, all_ret, feats_archs):
 
     cols = [i for i in all_ret.columns if "mu" in i]
@@ -642,8 +692,36 @@ def make_pacmap(this_save_path, all_ret, feats_archs):
     X_transformed = embedding.fit_transform(feats, init="pca")
 
     archs_transform = embedding.transform(feats_archs, init="pca", basis=feats)
+
+    gene_to_struct = {
+        "NUP153": "Nuclear Pores",
+        "FBL": "Nucleoli (DFC)",
+        "LAMP1": "Lysosomes",
+        "ST6GAL1": "Golgi",
+        "SON": "Nuclear Speckles",
+        "HIST1H2BJ": "Histones",
+        "SMC1A": "Cohesins",
+        "CETN2": "Centrioles",
+        "SLC25A17": "Peroxisomes",
+        "RAB5A": "Endosomes",
+        "NPM1": "Nucleoli (GC)",
+    }
+    all_ret["structure_name"] = all_ret["structure_name"].apply(lambda x: gene_to_struct[x])
+
     labels = all_ret["structure_name"].values
-    colors = sns.color_palette("Paired", len(np.unique(labels)))
+    if "Golgi" in labels:
+        colors = sns.color_palette("Set2", len(np.unique(labels)))
+    else:
+        colors = sns.color_palette("Paired", len(np.unique(labels)))
+
+    tmp = pd.DataFrame(
+        X_transformed, columns=[f"PACMAP_{i}" for i in range(X_transformed.shape[1])]
+    )
+    tmp2 = pd.DataFrame(
+        archs_transform, columns=[f"Archetype_{i}" for i in range(archs_transform.shape[1])]
+    )
+    tmp = pd.concat([tmp, tmp2, all_ret[["structure_name"]]], axis=1)
+    tmp.to_csv(this_save_path / "pacmap_archetypes.csv")
 
     cdict = {i: colors[j] for j, i in enumerate(np.unique(labels))}
 
@@ -659,7 +737,10 @@ def make_pacmap(this_save_path, all_ret, feats_archs):
             alpha=0.6,
         )
     ax.legend()
-    lgnd = plt.legend(loc="upper right", numpoints=1, fontsize=10)
+    if "Golgi" in labels:
+        lgnd = plt.legend(loc="upper left", numpoints=1, fontsize=10)
+    else:
+        lgnd = plt.legend(loc="upper right", numpoints=1, fontsize=10)
 
     # change the marker size manually for both lines
     for handle in lgnd.legend_handles:
diff --git a/src/br/analysis/run_analysis.py b/src/br/analysis/run_analysis.py
index 91d98b9..483f724 100644
--- a/src/br/analysis/run_analysis.py
+++ b/src/br/analysis/run_analysis.py
@@ -12,6 +12,7 @@
     latent_walk_polymorphic,
     latent_walk_save_recons,
     make_pacmap,
+    make_pca_score_plot,
     pseudo_time_analysis,
     setup_gpu,
     str2bool,
@@ -48,6 +49,8 @@ def main(args):
     this_save_path = Path(args.save_path) / Path("latent_walks")
     this_save_path.mkdir(parents=True, exist_ok=True)
 
+    make_pca_score_plot(this_save_path, all_ret, stratify_key, args.dataset_name)
+
     if args.sdf:
         latent_walk_polymorphic(stratify_key, all_ret, this_save_path, latent_dim)
     else:
diff --git a/src/br/analysis/run_classification.py b/src/br/analysis/run_classification.py
index 4a21767..03aafcd 100644
--- a/src/br/analysis/run_classification.py
+++ b/src/br/analysis/run_classification.py
@@ -2,16 +2,18 @@
 import os
 import sys
 from pathlib import Path
-import pandas as pd
+
+import matplotlib.pyplot as plt
 import numpy as np
-from br.models.compute_features import get_embeddings
-from br.models.utils import get_all_configs_per_dataset
+import pandas as pd
+import seaborn as sns
 from skimage import measure as skmeasure
 from skimage import morphology as skmorpho
 from tqdm import tqdm
+
 from br.features.classification import get_classification_df
-import matplotlib.pyplot as plt
-import seaborn as sns
+from br.models.compute_features import get_embeddings
+from br.models.utils import get_all_configs_per_dataset
 
 
 def get_surface_area(input_img):
@@ -20,9 +22,9 @@ def get_surface_area(input_img):
     input_img[:, :, [0, -1]] = 0
     input_img[:, [0, -1], :] = 0
     input_img[[0, -1], :, :] = 0
-    input_img_surface = np.logical_xor(
-        input_img, skmorpho.binary_erosion(input_img)
-    ).astype(np.uint8)
+    input_img_surface = np.logical_xor(input_img, skmorpho.binary_erosion(input_img)).astype(
+        np.uint8
+    )
     # Loop through the boundary voxels to calculate the number of
     # boundary faces. Using 6-neighborhod.
     pxl_z, pxl_y, pxl_x = np.nonzero(input_img_surface)
@@ -113,20 +115,14 @@ def main(args):
         feats["CellId"] = row["CellId"]
         all_feats.append(pd.DataFrame(feats, index=[0]))
     all_feats = pd.concat(all_feats, axis=0)
-    all_feats = all_feats.merge(
-        orig_df[["CellId", "vol_bins", "vol_bins_inds"]], on="CellId"
-    )
+    all_feats = all_feats.merge(orig_df[["CellId", "vol_bins", "vol_bins_inds"]], on="CellId")
     all_feats["mean_volume"] = all_feats["shape_volume"] / all_feats["connectivity_cc"]
     all_feats["mean_surface_area"] = (
         all_feats["roundness_surface_area"] / all_feats["connectivity_cc"]
     )
 
-    all_feats = all_feats.merge(
-        orig_df[["CellId", "STR_connectivity_cc_thresh"]], on="CellId"
-    )
-    all_feats = all_feats.loc[all_feats["CellId"] != 724520].reset_index(
-        drop=True
-    )  # nan row
+    all_feats = all_feats.merge(orig_df[["CellId", "STR_connectivity_cc_thresh"]], on="CellId")
+    all_feats = all_feats.loc[all_feats["CellId"] != 724520].reset_index(drop=True)  # nan row
     all_ret = all_ret.loc[all_ret["CellId"] != 724520].reset_index(drop=True)  # nan row
     assert not all_feats["mean_surface_area"].isna().any()
 
@@ -134,7 +130,6 @@ def main(args):
         orig_df[["CellId", "vol_bins", "vol_bins_inds"]],
         on="CellId",
     )
-    from br.features.classification import get_classification_df
 
     all_baseline = []
     all_feats["model"] = "baseline"
@@ -175,9 +170,7 @@ def main(args):
         "(676.015, 818.646)",
         "(818.646, 961.277)",
     ]
-    g = sns.boxplot(
-        ax=ax, data=plot, x="vol_bin", y="top_1_acc", hue="features", order=x_order
-    )
+    g = sns.boxplot(ax=ax, data=plot, x="vol_bin", y="top_1_acc", hue="features", order=x_order)
     plt.xticks(rotation=30)
     ax.set_xticklabels(
         ["0", "1", "2", "3", "4"], rotation=0
@@ -191,6 +184,7 @@ def main(args):
         bbox_inches="tight",
         dpi=300,
     )
+    plot.to_csv(args.save_path + "classification_number_pieces.csv")
     # fig.savefig("classification_number_pieces_nogrouping.png", bbox_inches="tight", dpi=300)
 
 
@@ -198,18 +192,14 @@ def main(args):
     parser = argparse.ArgumentParser(
         description="Script for computing perturbation detection metrics"
     )
-    parser.add_argument(
-        "--save_path", type=str, required=True, help="Path to save the results."
-    )
+    parser.add_argument("--save_path", type=str, required=True, help="Path to save the results.")
     parser.add_argument(
         "--embeddings_path",
         type=str,
         required=True,
         help="Path to the saved embeddings.",
     )
-    parser.add_argument(
-        "--dataset_name", type=str, required=True, help="Name of the dataset."
-    )
+    parser.add_argument("--dataset_name", type=str, required=True, help="Name of the dataset.")
     args = parser.parse_args()
 
     # Validate that required paths are provided
@@ -221,6 +211,6 @@ def main(args):
 
     """
     Example run:
-    
+
     python src/br/analysis/run_classification.py --save_path "./outputs_npm1/" --embeddings_path "./morphology_appropriate_representation_learning/model_embeddings/npm1/" --dataset_name "npm1"
     """
diff --git a/src/br/analysis/run_drugdata_LDA.py b/src/br/analysis/run_drugdata_LDA.py
index 8e10a91..175a80c 100644
--- a/src/br/analysis/run_drugdata_LDA.py
+++ b/src/br/analysis/run_drugdata_LDA.py
@@ -2,18 +2,21 @@
 import os
 import sys
 from pathlib import Path
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
-from sklearn.decomposition import PCA
-from tqdm import tqdm
-import numpy as np
-from skimage.io import imread
+
+import matplotlib as mpl
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
-from br.models.compute_features import get_embeddings
+import seaborn as sns
+from skimage import measure
+from skimage.io import imread
+from sklearn.decomposition import PCA
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+from tqdm import tqdm
+
 from br.analysis.analysis_utils import str2bool
+from br.models.compute_features import get_embeddings
 from br.models.utils import get_all_configs_per_dataset
-from skimage import measure
-import seaborn as sns
 
 
 def pad_to_size(array, target_shape, padding_value=0):
@@ -39,9 +42,7 @@ def pad_to_size(array, target_shape, padding_value=0):
 
     padding = [(padding_before[i], padding_after[i]) for i in range(len(target_shape))]
 
-    padded_array = np.pad(
-        array, pad_width=padding, mode="constant", constant_values=padding_value
-    )
+    padded_array = np.pad(array, pad_width=padding, mode="constant", constant_values=padding_value)
     return padded_array
 
 
@@ -53,9 +54,8 @@ def get_image(cell_id, raw_df):
 
 
 def sort_by_second_element_with_index(data):
-    """
-    Sorts a list of lists based on the second element of each sublist
-    and returns a list of original indices in the sorted order.
+    """Sorts a list of lists based on the second element of each sublist and returns a list of
+    original indices in the sorted order.
 
     Args:
         data: A list of lists.
@@ -92,10 +92,8 @@ def merge_contours(contours, distance_threshold):
 
                 # Calculate distances between all pairs of points
                 distances = np.sqrt(
-                    (
-                        (contour1[:, 0, None] - contour2[:, 0]) ** 2
-                        + (contour1[:, 1, None] - contour2[:, 1]) ** 2
-                    )
+                    (contour1[:, 0, None] - contour2[:, 0]) ** 2
+                    + (contour1[:, 1, None] - contour2[:, 1]) ** 2
                 )
 
                 # Find the minimum distance
@@ -136,12 +134,8 @@ def main(args):
         args.embeddings_path,
     )
     raw_df = pd.read_csv(Path(args.raw_path) / "manifest.csv")
-    raw_df["crop_raw"] = raw_df["crop_raw"].apply(
-        lambda x: Path(args.raw_path) / Path(x)
-    )
-    raw_df["crop_seg_nuc"] = raw_df["crop_seg_nuc"].apply(
-        lambda x: Path(args.raw_path) / Path(x)
-    )
+    raw_df["crop_raw"] = raw_df["crop_raw"].apply(lambda x: Path(args.raw_path) / Path(x))
+    raw_df["crop_seg_nuc"] = raw_df["crop_seg_nuc"].apply(lambda x: Path(args.raw_path) / Path(x))
 
     map_ = {
         "Actinomyocin D 0.5ug per mL": "Actinomyocin D",
@@ -163,7 +157,7 @@ def main(args):
         "Brefeldin 5uM": "Brefeldin",
     }
     all_ret["condition"] = all_ret["condition"].replace(map_)
-    all_ret = all_ret.merge(raw_df[['CellId', 'plate_id']], on='CellId')
+    all_ret = all_ret.merge(raw_df[["CellId", "plate_id"]], on="CellId")
     cols = [i for i in all_ret.columns if "mu" in i]
 
     hits = [
@@ -196,11 +190,11 @@ def main(args):
     sns.set_context("talk")
 
     if args.baseline:
-        all_ret = all_ret.loc[all_ret['condition'] == 'DMSO (control)']
-        hits = [[215, 214, 231, 213, 232, 230, 233, 216]] # random sample of plates
+        all_ret = all_ret.loc[all_ret["condition"] == "DMSO (control)"]
+        hits = [[215, 214, 231, 213, 232, 230, 233, 216]]  # random sample of plates
         merge_thresh = [11]
-        scale_lows = [i*2 for i in scale_lows]
-        scale_highs = [i*3 for i in scale_highs]
+        scale_lows = [i * 2 for i in scale_lows]
+        scale_highs = [i * 3 for i in scale_highs]
 
     for j, hit in enumerate(hits):
         print("Analysis for", hit)
@@ -210,8 +204,8 @@ def main(args):
             tmp1 = all_ret.loc[all_ret["condition"] == "DMSO (control)"]
             tmp2 = all_ret.loc[all_ret["condition"] == hit]
         else:
-            tmp1 = all_ret.loc[all_ret['plate_id'].isin(hit[:4])]
-            tmp2 = all_ret.loc[all_ret['plate_id'].isin(hit[4:])]
+            tmp1 = all_ret.loc[all_ret["plate_id"].isin(hit[:4])]
+            tmp2 = all_ret.loc[all_ret["plate_id"].isin(hit[4:])]
         tmp1["class"] = 0
         tmp2["class"] = 1
         tmp = pd.concat([tmp1, tmp2], axis=0).reset_index(drop=True)
@@ -222,6 +216,8 @@ def main(args):
         preds = clf.fit_transform(X, y)
         lda_direction = clf.coef_[0]
         lda_line = np.array([-lda_direction * scale_low, lda_direction * scale_high])
+
+        mpl.rcParams["font.size"] = 17
         fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
         colors = plt.cm.Set2(np.linspace(0, 1, 8))
         # PCA Projection plot
@@ -296,6 +292,8 @@ def main(args):
         count = 0
         max_x, max_y = 0, 0
         seen = set()
+        tmp["LDA_line"] = np.NaN
+
         for w in tqdm(walk, desc="Traversing PC-LDA line"):
             dist = np.linalg.norm(X - w, axis=1)
             dist_argsort = np.argsort(dist)
@@ -318,7 +316,7 @@ def main(args):
         classes = []
         from scipy import ndimage
 
-        for w in tqdm(walk, desc="Traversing PC-LDA line"):
+        for j_index, w in enumerate(tqdm(walk, desc="Traversing PC-LDA line")):
             dist = np.linalg.norm(X - w, axis=1)
             dist_argsort = np.argsort(dist)
             examples = []
@@ -337,6 +335,7 @@ def main(args):
 
             this_class = tmp.iloc[idx]["class"]
             classes.append(this_class)
+            tmp.loc[tmp["CellId"] == this_id, "LDA_line"] = j_index
 
             ax1.scatter(
                 tmp.iloc[idx]["pc_0"],
@@ -362,6 +361,7 @@ def main(args):
             count += 1
         movie = np.hstack(movie)
         movie2 = np.hstack(movie2)
+        tmp.to_csv(save_path + f"LDA_{hit}.csv")
 
         contours = measure.find_contours(movie2, 0.5)
 
@@ -390,20 +390,18 @@ def main(args):
     parser = argparse.ArgumentParser(
         description="Script for computing perturbation detection metrics"
     )
-    parser.add_argument(
-        "--save_path", type=str, required=True, help="Path to save the results."
-    )
+    parser.add_argument("--save_path", type=str, required=True, help="Path to save the results.")
     parser.add_argument(
         "--embeddings_path",
         type=str,
         required=True,
         help="Path to the saved embeddings.",
     )
+    parser.add_argument("--dataset_name", type=str, required=True, help="Name of the dataset.")
+    parser.add_argument("--raw_path", type=str, required=True, help="Path to raw data")
     parser.add_argument(
-        "--dataset_name", type=str, required=True, help="Name of the dataset."
+        "--baseline", type=str2bool, default=False, help="Perform LDA baseline only"
     )
-    parser.add_argument("--raw_path", type=str, required=True, help="Path to raw data")
-    parser.add_argument("--baseline", type=str2bool, default=False, help="Perform LDA baseline only")
     args = parser.parse_args()
 
     # Validate that required paths are provided
@@ -419,6 +417,6 @@ def main(args):
     For all drugs:
     python src/br/analysis/run_drugdata_LDA.py --save_path "./outputs_npm1_perturb/" --embeddings_path "./morphology_appropriate_representation_learning/model_embeddings/npm1_perturb/" --dataset_name "npm1_perturb" --raw_path "./NPM1_single_cell_drug_perturbations/"
 
-    For baseline (DMSO subset 1 -> DMSO subset 2): 
+    For baseline (DMSO subset 1 -> DMSO subset 2):
     python src/br/analysis/run_drugdata_LDA.py --save_path "./outputs_npm1_perturb/" --embeddings_path "./morphology_appropriate_representation_learning/model_embeddings/npm1_perturb/" --dataset_name "npm1_perturb" --raw_path "./NPM1_single_cell_drug_perturbations/" --baseline True
     """
diff --git a/src/br/features/classification.py b/src/br/features/classification.py
index ad63f17..992a342 100644
--- a/src/br/features/classification.py
+++ b/src/br/features/classification.py
@@ -91,8 +91,6 @@ def get_classification(this_mo, target_col, cols=None):
     if cols is None:
         cols = [i for i in this_mo.columns if "mu" in i]
 
-
-
     clf = proba_logreg(
         random_state=20,
         class_weight=class_weight,