simularium · blairlyons · Jul 29, 2024 · Jun 25, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -53,12 +53,6 @@ jobs:
         conda env update --file environment.yml --prune
         pdm sync
 
-    - name: Test with pytest
-      shell: bash -l {0}
-      run: |
-        pdm run pytest --cov --cov-report html
-        rm htmlcov/.gitignore
-
     - name: Publish coverage report
       if: ${{ github.ref == 'refs/heads/main' && matrix.python-version == '3.10' }}
       uses: JamesIves/github-pages-deploy-action@v4

diff --git a/.pdm-python b/.pdm-python
@@ -0,0 +1 @@
+/Users/blairl/Documents/Dev/subcell-pipeline/.venv/bin/python
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,7 @@ dependencies = [
     "setuptools>=70.0.0",
     "io-collection>=0.10.2",
     "python-dotenv>=1.0.1",
-    "imageio",
+    "h5py>=3.11.0",
 ]
 
 [project.urls]

diff --git a/subcell_pipeline/analysis/compression_metrics/README.md b/subcell_pipeline/analysis/compression_metrics/README.md
@@ -2,6 +2,6 @@
 
 ## Metrics for comparing traces of compressed fibers
 
-Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics metrics to compare fibers.
+Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics to compare fibers.
 
 - **Compare compression metrics between simulators** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html))
diff --git a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py
@@ -106,7 +106,7 @@ def align_fibers(data: pd.DataFrame) -> None:
 
 def align_fiber(coords: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
     """
-    Align an array of x, y, z coordinates along the positive x axis.
+    Align an array of x, y, z coordinates along the positive y axis.
 
     The function identifies the furthest point in the yz-plane and computes the
     angle needed to rotate this point to lie on the positive y axis. This

diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py
@@ -26,8 +26,6 @@
     raise ImportError("This module is a notebook and is not meant to be imported")
 
 # %%
-from pathlib import Path
-
 import pandas as pd
 
 from subcell_pipeline.analysis.tomography_data.tomography_data import (
@@ -37,9 +35,6 @@
     sample_tomography_data,
 )
 
-# pixels to um
-TOMOGRAPHY_SCALE_FACTOR: float = 0.0006
-
 # %% [markdown]
 """
 ## Load tomography datasets
@@ -55,13 +50,12 @@
 # S3 bucket for input and output files
 bucket = "s3://subcell-working-bucket"
 
-# Temporary path to save visualization files
-temp_path: Path = Path(__file__).parents[3] / "analysis_outputs"
-temp_path.mkdir(parents=True, exist_ok=True)
-
 # Data repository for downloading tomography data
 repository = "https://raw.githubusercontent.com/RangamaniLabUCSD/actincme/master/PolarityAnalysis/"
 
+# Conversion factor from pixels to um for this dataset
+tomography_scale_factor: float = 0.0006
+
 # Folders and names of branched actin datasets
 branched_datasets = [
     ("2018August_Tomo27", "TomoAugust_27_earlyCME"),
@@ -87,14 +81,14 @@
     name=name,
     repository=repository,
     datasets=branched_datasets,
-    scale_factor=TOMOGRAPHY_SCALE_FACTOR,
+    scale_factor=tomography_scale_factor,
 )
 unbranched_df = get_unbranched_tomography_data(
     bucket=bucket,
     name=name,
     repository=repository,
     datasets=unbranched_datasets,
-    scale_factor=TOMOGRAPHY_SCALE_FACTOR,
+    scale_factor=tomography_scale_factor,
 )
 
 # %% [markdown]
@@ -104,7 +98,7 @@
 
 # %%
 plot_tomography_data_by_dataset(
-    branched_df, bucket, f"{name}/{name}_plots_branched.png", str(temp_path)
+    branched_df, bucket, f"{name}/{name}_plots_branched.png"
 )
 
 # %% [markdown]
@@ -114,7 +108,7 @@
 
 # %%
 plot_tomography_data_by_dataset(
-    unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png", str(temp_path)
+    unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png"
 )
 
 # %% [markdown]
@@ -162,7 +156,7 @@
 
 # %%
 plot_tomography_data_by_dataset(
-    sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png", str(temp_path)
+    sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png"
 )
 
 # %%
diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py
@@ -1,20 +1,15 @@
-import io
-import os
-
-import imageio
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from io_collection.keys.check_key import check_key
 from io_collection.load.load_dataframe import load_dataframe
-from io_collection.save.save_buffer import save_buffer_to_s3
 from io_collection.save.save_dataframe import save_dataframe
-from PIL import Image
+from io_collection.save.save_figure import save_figure
 
 TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"]
 
 
-def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_:
+def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> bool:
     """
     Test whether the angles between consecutive segments of a polymer
     trace are less than 90 degrees.
@@ -26,7 +21,7 @@ def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_:
 
     Returns
     -------
-    bool
+    :
         True if all consecutive angles are less than 180 degrees.
     """
     vectors = polymer_trace[1:] - polymer_trace[:-1]
@@ -35,7 +30,7 @@ def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_:
     dot_products = np.dot(vectors[1:], vectors[:-1].T)
 
     # Check if any angle is greater than 90 degrees
-    return np.all(dot_products > 0)
+    return np.all(dot_products > 0).item()
 
 
 def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame:
@@ -284,17 +279,10 @@ def sample_tomography_data(
         return all_sampled_df
 
 
-def save_image_to_s3(bucket: str, key: str, image: np.ndarray) -> None:
-    with io.BytesIO() as buffer:
-        Image.fromarray(image).save(buffer, format="png")
-        save_buffer_to_s3(bucket[5:], key, buffer, "image/png")
-
-
 def plot_tomography_data_by_dataset(
     data: pd.DataFrame,
     bucket: str,
     output_key: str,
-    temp_path: str,
 ) -> None:
     """
     Plot tomography data for each dataset.
@@ -307,13 +295,9 @@ def plot_tomography_data_by_dataset(
         Where to upload the results.
     output_key
         File key for results.
-    temp_path
-        Local path for saving visualization output files.
     """
-    local_save_path = os.path.join(temp_path, os.path.basename(output_key))
-
+    figure, ax = plt.subplots(1, 3, figsize=(6, 2))
     for dataset, group in data.groupby("dataset"):
-        _, ax = plt.subplots(1, 3, figsize=(6, 2))
 
         ax[1].set_title(dataset)
 
@@ -329,6 +313,4 @@ def plot_tomography_data_by_dataset(
             ax[1].plot(fiber["xpos"], fiber["zpos"], marker="o", ms=1, lw=1)
             ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1)
 
-    plt.savefig(local_save_path)
-    image: np.ndarray = imageio.imread(local_save_path)
-    save_image_to_s3(bucket, output_key, image)
+    save_figure(bucket, output_key, figure)
diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py
@@ -52,14 +52,6 @@
 # Number of monomer points per fiber
 n_monomer_points = 200
 
-# Total number of steps for each condition
-total_steps: dict[str, int] = {
-    "0047": int(3.2e8),
-    "0150": int(1e8),
-    "0470": int(3.2e7),
-    "1500": int(1e7),
-}
-
 # Temporary path to save downloaded trajectories
 temp_path: Path = Path(__file__).parents[3] / "aws_downloads"
 temp_path.mkdir(parents=True, exist_ok=True)
@@ -85,6 +77,6 @@
     n_replicates,
     n_timepoints,
     n_monomer_points,
-    total_steps,
-    str(temp_path),
+    compression=True,
+    temp_path=str(temp_path),
 )
diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py
@@ -48,9 +48,6 @@
 # Number of monomer points per fiber
 n_monomer_points = 200
 
-# Total number of steps for each condition
-total_steps: dict[str, int] = {"": int(1e7)}
-
 # Temporary path to save downloaded trajectories
 temp_path: Path = Path(__file__).parents[3] / "aws_downloads"
 temp_path.mkdir(parents=True, exist_ok=True)
@@ -76,6 +73,6 @@
     n_replicates,
     n_timepoints,
     n_monomer_points,
-    total_steps,
-    str(temp_path),
+    compression=False,
+    temp_path=str(temp_path),
 )
diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py
@@ -9,7 +9,11 @@
 from io_collection.save.save_pickle import save_pickle
 from tqdm import tqdm
 
-from .data_structures import FrameData, ParticleData, TopologyData
+from subcell_pipeline.simulation.readdy.data_structures import (
+    FrameData,
+    ParticleData,
+    TopologyData,
+)
 
 
 class ReaddyLoader:

diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py
@@ -1,6 +1,7 @@
 """Methods for parsing ReaDDy simulations."""
 
 import os
+from math import floor, log10
 from typing import Optional, Union
 
 import boto3
@@ -47,6 +48,9 @@
 BOX_SIZE: np.ndarray = np.array(3 * [600.0])
 """Default simulation volume dimensions (x, y, z)."""
 
+COMPRESSION_DISTANCE: float = 150.0
+"""Total distance the fiber end was displaced in nm."""
+
 
 def _download_s3_file(bucket: str, key: str, dest_path: str) -> Optional[str]:
     """
@@ -205,14 +209,23 @@ def parse_readdy_simulation_single_fiber_trajectory(
     return dataframe
 
 
+def round_2_sig_figs(x: float) -> int:
+    return int(round(x, -int(floor(log10(abs(0.1 * x))))))
+
+
+def velocity_for_cond(condition_key: str) -> float:
+    """'NNNN' -> NNN.N."""
+    return float(condition_key[:3] + "." + condition_key[-1])
+
+
 def parse_readdy_simulation_data(
     bucket: str,
     series_name: str,
     condition_keys: list[str],
     n_replicates: int,
     n_timepoints: int,
     n_monomer_points: int,
-    total_steps: dict[str, int],
+    compression: bool,
     temp_path: str,
 ) -> None:
     """
@@ -232,11 +245,22 @@ def parse_readdy_simulation_data(
         Number of equally spaced timepoints to sample.
     n_monomer_points
         Number of equally spaced monomer points to sample.
-    total_steps
-        Total number of steps for each simulation key.
+    compression
+        If True, parse compressed trajectories,
+        If False, parse baseline uncompressed trajectories.
     temp_path
         Path for saving temporary h5 files.
     """
+    total_steps: dict[str, int] = {}
+    if compression:
+        total_steps = {
+            cond: round_2_sig_figs(
+                (COMPRESSION_DISTANCE * 1e-3 / velocity_for_cond(cond)) * 1e10
+            )
+            for cond in condition_keys
+        }
+    else:
+        total_steps = {"": int(1e7)}
 
     for condition_key in condition_keys:
         series_key = f"{series_name}_{condition_key}" if condition_key else series_name

diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py
@@ -7,7 +7,7 @@
 fibers using [Simularium](https://simularium.allencell.org/).
 
 - [Define visualization settings](#define-visualization-settings)
-- [Visualize tomography data](#visualize-tomography)
+- [Visualize tomography data](#visualize-tomography-data)
 """
 
 # %%

diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/data/readdy/actin_ortho_filament_10_steps.h5 b/tests/data/readdy/actin_ortho_filament_10_steps.h5
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		/Users/blairl/Documents/Dev/subcell-pipeline/.venv/bin/python
blairlyons marked this conversation as resolved. Outdated Show resolved Hide resolved