From 7c42408126f9e742cf3ad66f578c61ad4a571ee2 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 25 Jun 2024 16:34:32 -0700 Subject: [PATCH 01/63] update readdy module to match cytosim as much as possible --- subcell_pipeline/simulation/constants.py | 28 ++ ...process_cytosim_compression_simulations.py | 4 +- ...cess_cytosim_no_compression_simulations.py | 4 +- subcell_pipeline/simulation/readdy/README.md | 17 + .../simulation/readdy/__init__.py | 6 +- ..._process_readdy_compression_simulations.py | 69 +++ ...ocess_readdy_no_compression_simulations.py | 65 +++ .../simulation/readdy/constants.py | 34 ++ .../create_dataframes_from_readdy_outputs.py | 111 ----- .../{readdy_data.py => data_structures.py} | 0 .../readdy/{readdy_loader.py => loader.py} | 2 +- subcell_pipeline/simulation/readdy/parser.py | 193 ++++++++ ...dy_post_processor.py => post_processor.py} | 51 +- .../simulation/readdy/readdy_analysis.py | 457 ------------------ .../readdy/readdy_loading_example.py | 74 --- 15 files changed, 430 insertions(+), 685 deletions(-) create mode 100644 subcell_pipeline/simulation/constants.py create mode 100644 subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py create mode 100644 subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py create mode 100644 subcell_pipeline/simulation/readdy/constants.py delete mode 100644 subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py rename subcell_pipeline/simulation/readdy/{readdy_data.py => data_structures.py} (100%) rename subcell_pipeline/simulation/readdy/{readdy_loader.py => loader.py} (98%) create mode 100644 subcell_pipeline/simulation/readdy/parser.py rename subcell_pipeline/simulation/readdy/{readdy_post_processor.py => post_processor.py} (95%) delete mode 100644 subcell_pipeline/simulation/readdy/readdy_analysis.py delete mode 100644 subcell_pipeline/simulation/readdy/readdy_loading_example.py diff --git a/subcell_pipeline/simulation/constants.py b/subcell_pipeline/simulation/constants.py new file mode 100644 index 0000000..8667478 --- /dev/null +++ b/subcell_pipeline/simulation/constants.py @@ -0,0 +1,28 @@ +"""Constants for parsing simulations.""" + + +COLUMN_NAMES = [ + "fiber_id", + "xpos", + "ypos", + "zpos", + "xforce", + "yforce", + "zforce", + "segment_curvature", + "time", + "fiber_point", +] + +COLUMN_DTYPES = { + "fiber_id": int, + "xpos": float, + "ypos": float, + "zpos": float, + "xforce": float, + "yforce": float, + "zforce": float, + "segment_curvature": float, + "time": float, + "fiber_point": int, +} \ No newline at end of file diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py index 32c62ae..1fd47f8 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py @@ -57,8 +57,8 @@ files and parse them into a tidy data format. If the parsed file for a given condition key and random seed already exists, parsing is skipped. -- Input: `(name)/outputs/(name)_(condition_key)_(index)/` -- Output: `(name)/data/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index)/` +- Output: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` """ # %% diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py index da1fce8..50bdeb5 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py @@ -54,8 +54,8 @@ files and parse them into a tidy data format. If the parsed file for a given condition key and random seed already exists, parsing is skipped. -- Input: `(name)/outputs/(name)_(condition_key)_(index)/` -- Output: `(name)/data/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/outputs/(series_name)_(index)/` +- Output: `(series_name)/data/(series_name)_(seed).csv` """ # %% diff --git a/subcell_pipeline/simulation/readdy/README.md b/subcell_pipeline/simulation/readdy/README.md index 9ec550b..cdbe612 100644 --- a/subcell_pipeline/simulation/readdy/README.md +++ b/subcell_pipeline/simulation/readdy/README.md @@ -2,5 +2,22 @@ Simulations and processing for particle-based reaction-diffusion simulator [ReaDDy](https://readdy.github.io/). +## Run ReaDDy simulations (compression and no compression) + > - **Base simulator**: [https://github.com/readdy/readdy](https://github.com/readdy/readdy) > - **Model development**: [https://github.com/simularium/readdy-models](https://github.com/simularium/readdy-models) + +- **Run ReaDDy compression simulations** ([source](https://github.com/simularium/readdy-models/tree/main/examples/actin) | [readme](https://github.com/simularium/readdy-models/blob/main/examples/README.md)) + + +## Process baseline single actin fiber with no compression + +The `ACTIN_NO_COMPRESSION` simulation series simulates a single actin fiber with a free barbed end across five replicates. + +- **Process ReaDDy single fiber simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_no_compression_simulations.html)) + +## Process single actin fiber compressed at different compression velocities + +The `ACTIN_COMPRESSION_VELOCITY` simulation series simulates compression of a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five replicates. + +- **Process Cytosim compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_compression_simulations.html)) diff --git a/subcell_pipeline/simulation/readdy/__init__.py b/subcell_pipeline/simulation/readdy/__init__.py index 0b837a0..fe08729 100644 --- a/subcell_pipeline/simulation/readdy/__init__.py +++ b/subcell_pipeline/simulation/readdy/__init__.py @@ -1,5 +1,5 @@ """readdy package for subcell_analysis.""" -from .readdy_data import FrameData # noqa: F401 -from .readdy_loader import ReaddyLoader # noqa: F401 -from .readdy_post_processor import ReaddyPostProcessor # noqa: F401 +from .data_structures import FrameData, TopologyData, ParticleData # noqa: F401 +from .loader import ReaddyLoader # noqa: F401 +from .post_processor import ReaddyPostProcessor # noqa: F401 diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py new file mode 100644 index 0000000..9d71656 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -0,0 +1,69 @@ +# %% [markdown] +# # Process ReaDDy simulations + +# %% [markdown] +""" + +Notebook contains steps for post processing of ReaDDy simulations in which a +single actin fiber is compressed at different compression velocities. + +This notebook provides an example of processing a simulation series in which +there are multiple conditions, each of which were run with multiple replicates. +For an example of processing a simulation series with a single condition with +multiple replicates, see `process_readdy_no_compression_simulations.py`. + +- [Define simulation conditions](#define-simulation-conditions) +- [Parse simulation data](#parse-simulation-data) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.cytosim.post_processing import ( + parse_readdy_simulation_data, +) + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `COMPRESSION_VELOCITY` simulation series, which compresses a single +500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with +five replicates each. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# %% [markdown] +""" +## Parse simulation data + +Iterate through all condition keys and random seeds to load simulation output +files and parse them into a tidy data format. If the parsed file for a given +condition key and random seed already exists, parsing is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index).h5` +- Output: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` +""" + +# %% +parse_readdy_simulation_data(bucket, series_name, condition_keys, n_replicates, n_timepoints, n_monomer_points) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py new file mode 100644 index 0000000..28cb540 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -0,0 +1,65 @@ +# %% [markdown] +# # Process ReaDDy simulations + +# %% [markdown] +""" +Notebook contains steps for post processing of ReaDDy simulations for a +baseline single actin fiber with no compression. + +This notebook provides an example of processing a simulation series for a single +condition with multiple replicates. For an example of processing a simulation +series with multiple conditions, each of which have multiple replicates, see +`process_readdy_compression_simulations.py`. + +- [Define simulation conditions](#define-simulation-conditions) +- [Parse simulation data](#parse-simulation-data) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.cytosim.post_processing import ( + parse_readdy_simulation_data, +) +from subcell_pipeline.simulation.post_processing import sample_simulation_data + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `NO_COMPRESSION` simulation series, which simulates a single actin +fiber with a free barbed end across five replicates. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_NO_COMPRESSION" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# %% [markdown] +""" +## Parse simulation data + +Iterate through all condition keys and random seeds to load simulation output +files and parse them into a tidy data format. If the parsed file for a given +condition key and random seed already exists, parsing is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index).h5` +- Output: `(series_name)/data/(series_name)_(index).csv` +""" + +# %% +parse_readdy_simulation_data(bucket, series_name, [""], n_replicates, n_timepoints, n_monomer_points) diff --git a/subcell_pipeline/simulation/readdy/constants.py b/subcell_pipeline/simulation/readdy/constants.py new file mode 100644 index 0000000..5bfb3e9 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/constants.py @@ -0,0 +1,34 @@ +"""Constants for parsing ReaDDy simulations.""" + + +import numpy as np + + +# particle types correspond to types from simularium/readdy-models +ACTIN_START_PARTICLE_PHRASE = "pointed" +ACTIN_PARTICLE_TYPES = [ + "actin#", + "actin#ATP_", + "actin#mid_", + "actin#mid_ATP_", + "actin#fixed_", + "actin#fixed_ATP_", + "actin#mid_fixed_", + "actin#mid_fixed_ATP_", + "actin#barbed_", + "actin#barbed_ATP_", + "actin#fixed_barbed_", + "actin#fixed_barbed_ATP_", +] + +# measured from crystal structure +IDEAL_ACTIN_POSITIONS = np.array( + [ + [24.738, 20.881, 26.671], + [27.609, 24.061, 27.598], + [30.382, 21.190, 25.725], + ] +) +IDEAL_ACTIN_VECTOR_TO_AXIS = np.array( + [-0.01056751, -1.47785105, -0.65833209] +) diff --git a/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py b/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py deleted file mode 100644 index 5a3c4c5..0000000 --- a/subcell_pipeline/simulation/readdy/create_dataframes_from_readdy_outputs.py +++ /dev/null @@ -1,111 +0,0 @@ -import os -from typing import List - -import numpy as np -import pandas as pd -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor -from subcell_analysis.readdy.readdy_post_processor import array_to_dataframe - -IDEAL_ACTIN_POSITIONS = np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] -) -IDEAL_ACTIN_VECTOR_TO_AXIS = np.array([-0.01056751, -1.47785105, -0.65833209]) - - -def _load_readdy_fiber_points(h5_file_path, box_size, n_points_per_fiber): - readdy_loader = ReaddyLoader(str(h5_file_path)) - readdy_post_processor = ReaddyPostProcessor( - readdy_loader.trajectory(), - box_size=box_size, - ) - fiber_chain_ids = readdy_post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = readdy_post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=IDEAL_ACTIN_POSITIONS, - ideal_vector_to_axis=IDEAL_ACTIN_VECTOR_TO_AXIS, - ) - fiber_points = readdy_post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - n_points=n_points_per_fiber, - ) - return np.array(fiber_points) - - -def generate_readdy_df( - input_h5_file_dir: str = "data/aws_downloads/", - output_dir: str = "data/dataframes/readdy/", - n_points_per_fiber: int = 50, - box_size: np.ndarray = np.array(3 * [600.0]), - num_repeats: int = 5, - compression_velocities: List[float] = [4.7, 15, 47, 150], - reprocess: bool = True, -) -> pd.DataFrame: - result = [] - os.makedirs(output_dir, exist_ok=True) - for velocity in compression_velocities: - for repeat in range(num_repeats): - file_name = f"actin_compression_velocity={velocity}_{repeat}.h5" - df_save_path = os.path.join( - output_dir, - f"readdy_actin_compression_velocity_{velocity}_repeat_{repeat}.csv", - ) - if os.path.exists(df_save_path) and not reprocess: - print(f"{file_name} already processed") - df_points = pd.read_csv(df_save_path) - result.append(df_points) - continue - h5_file_path = os.path.join(input_h5_file_dir, file_name) - if not os.path.exists(h5_file_path): - print(f"{file_name} not found") - continue - print(f"Processing {file_name}") - fiber_points = _load_readdy_fiber_points( - str(h5_file_path), box_size, n_points_per_fiber - ) - df_points = array_to_dataframe(fiber_points) - df_points.reset_index(inplace=True) - df_points.rename(columns={0: "xpos", 1: "ypos", 2: "zpos"}, inplace=True) - df_points["velocity"] = velocity - df_points["repeat"] = repeat - df_points["simulator"] = "readdy" - df_points["normalized_time"] = ( - df_points["time"] - df_points["time"].min() - ) / (df_points["time"].max() - df_points["time"].min()) - df_points.to_csv( - df_save_path, - index=False, - ) - result.append(df_points) - return pd.concat(result) - - -if __name__ == "__main__": - output_dir = "data/dataframes/readdy/" - df_readdy = generate_readdy_df(output_dir=output_dir) - df_readdy.to_csv( - output_dir / "readdy_actin_compression_all_velocities_and_repeats.csv" - ) - df_readdy.to_parquet( - output_dir / "readdy_actin_compression_all_velocities_and_repeats.parquet" - ) diff --git a/subcell_pipeline/simulation/readdy/readdy_data.py b/subcell_pipeline/simulation/readdy/data_structures.py similarity index 100% rename from subcell_pipeline/simulation/readdy/readdy_data.py rename to subcell_pipeline/simulation/readdy/data_structures.py diff --git a/subcell_pipeline/simulation/readdy/readdy_loader.py b/subcell_pipeline/simulation/readdy/loader.py similarity index 98% rename from subcell_pipeline/simulation/readdy/readdy_loader.py rename to subcell_pipeline/simulation/readdy/loader.py index dc895d7..0141161 100644 --- a/subcell_pipeline/simulation/readdy/readdy_loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -7,7 +7,7 @@ import readdy from tqdm import tqdm -from .readdy_data import FrameData, ParticleData, TopologyData +from .data_structures import FrameData, ParticleData, TopologyData class ReaddyLoader: diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py new file mode 100644 index 0000000..16ac21e --- /dev/null +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -0,0 +1,193 @@ +"""Methods for parsing ReaDDy simulations.""" + +import os +from typing import List, Union, Tuple + +import boto3 +from botocore.exceptions import ClientError +import numpy as np +import pandas as pd +from io_collection.keys.check_key import check_key +from io_collection.save.save_dataframe import save_dataframe + +from .loader import ReaddyLoader +from .post_processor import ReaddyPostProcessor +from ..constants import COLUMN_NAMES, COLUMN_DTYPES +from .constants import ( + ACTIN_START_PARTICLE_PHRASE, + ACTIN_PARTICLE_TYPES, + IDEAL_ACTIN_POSITIONS, + IDEAL_ACTIN_VECTOR_TO_AXIS, +) + + +LOCAL_DOWNLOADS_PATH = "aws_downloads/" +READDY_TIMESTEP = 0.1 # ns +READDY_TOTAL_STEPS = { + "ACTIN_NO_COMPRESSION" : 1e7, + "ACTIN_COMPRESSION_VELOCITY_0047" : 3.2e8, + "ACTIN_COMPRESSION_VELOCITY_0150" : 1e8, + "ACTIN_COMPRESSION_VELOCITY_0470" : 3.2e7, + "ACTIN_COMPRESSION_VELOCITY_1500" : 1e7, +} +BOX_SIZE = np.array(3 * [600.0]) + + +s3_client = boto3.client("s3") + + +def _make_download_dir(): + if not os.path.isdir(LOCAL_DOWNLOADS_PATH): + os.makedirs(LOCAL_DOWNLOADS_PATH) + + +def _download_s3_file(bucket_name, key, dest_path) -> bool: + """ + Download files from S3 (skip files that already exist) + + (ReaDDy Python pkg currently requires a local file path) + """ + if os.path.isfile(dest_path): + # already downloaded + return False + try: + s3_client.download_file( + bucket_name, + key, + dest_path, + ) + print(f"Downloaded {dest_path}") + return True + except ClientError: + print(f"!!! Failed to download {key}") + return False + + +def _load_readdy_fiber_points( + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, +) -> Tuple[np.ndarray, np.ndarray]: + """ + Load a ReaDDy trajectory, calculate the polymer trace from + the monomer particle positions (using measurements from x-ray crystallography), + and resample to get the requested number of points + along each linear fiber at each timestep. + """ + h5_file_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + time_inc = READDY_TOTAL_STEPS[series_key] / n_timepoints + readdy_loader = ReaddyLoader( + h5_file_path=str(h5_file_path), + time_inc=time_inc, + timestep=READDY_TIMESTEP, + ) + readdy_post_processor = ReaddyPostProcessor( + readdy_loader.trajectory(), + box_size=BOX_SIZE, + ) + fiber_chain_ids = readdy_post_processor.linear_fiber_chain_ids( + start_particle_phrases=[ACTIN_START_PARTICLE_PHRASE], + other_particle_types=ACTIN_PARTICLE_TYPES, + polymer_number_range=5, + ) + axis_positions, _ = readdy_post_processor.linear_fiber_axis_positions( + fiber_chain_ids=fiber_chain_ids, + ideal_positions=IDEAL_ACTIN_POSITIONS, + ideal_vector_to_axis=IDEAL_ACTIN_VECTOR_TO_AXIS, + ) + fiber_points = readdy_post_processor.linear_fiber_control_points( + axis_positions=axis_positions, + n_points=n_monomer_points, + ) + times = readdy_post_processor.times() + return np.array(fiber_points), times + + +def _parse_readdy_simulation_trajectory( + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, +) -> pd.DataFrame: + """ + Parse ReaDDy trajectory data into tidy data format. + (Assume one fiber) + """ + fiber_points, times = _load_readdy_fiber_points( + series_key, rep_ix, n_timepoints, n_monomer_points + ) + + point_data: list[list[Union[str, int, float]]] = [] + for time_ix in range(fiber_points.shape[0]): + for pos_ix in range(fiber_points.shape[2]): + point_data.append([ + 1, # fiber_id + fiber_points[time_ix][0][pos_ix][0], # xpos + fiber_points[time_ix][0][pos_ix][1], # ypos + fiber_points[time_ix][0][pos_ix][2], # zpos + 0.0, # xforce + 0.0, # yforce + 0.0, # zforce + 0.0, # segment_curvature + times[time_ix], # time + pos_ix, # fiber_point + ]) + + # Combine all data into dataframe and update data types. + dataframe = pd.DataFrame(point_data, columns=COLUMN_NAMES) + dataframe = dataframe.astype(dtype=COLUMN_DTYPES) + + # Add placeholders for features not calculated in ReaDDy + dataframe["force_magnitude"] = np.array(len(point_data) * [0.0]) + dataframe["segment_energy"] = np.array(len(point_data) * [0.0]) + + return dataframe + + +def parse_readdy_simulation_data( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, + n_monomer_points: int, +) -> None: + """ + Parse ReaDDy simulation data for select conditions and replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + """ + _make_download_dir() + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + dataframe_key = f"{series_name}/data/{series_key}_{rep_ix}.csv" + + # Skip if dataframe file already exists. + if check_key(bucket, dataframe_key): + print(f"Dataframe [ { dataframe_key } ] already exists. Skipping.") + continue + + print(f"Parsing data for [ {condition_key} ] replicate [ {rep_ix} ]") + + aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" + local_h5_key = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + _download_s3_file(bucket, aws_h5_key, local_h5_key) + + data = _parse_readdy_simulation_trajectory( + series_key, rep_ix, n_timepoints, n_monomer_points + ) + + save_dataframe(bucket, dataframe_key, data, index=False) diff --git a/subcell_pipeline/simulation/readdy/readdy_post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py similarity index 95% rename from subcell_pipeline/simulation/readdy/readdy_post_processor.py rename to subcell_pipeline/simulation/readdy/post_processor.py index ba9377d..ac6510f 100644 --- a/subcell_pipeline/simulation/readdy/readdy_post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import math -import time from typing import Dict, List, Tuple import numpy as np @@ -10,7 +9,7 @@ from tqdm import tqdm from ..compression_analysis import get_contour_length_from_trace -from .readdy_data import FrameData +from .data_structures import FrameData class ReaddyPostProcessor: @@ -39,6 +38,20 @@ def __init__( self.trajectory = trajectory self.box_size = box_size self.periodic_boundary = periodic_boundary + + def times(self) -> np.ndarray: + """ + Get simulation time at each timestep. + + Returns + ------- + times: np.array (shape = n_timesteps) + Array of time stamps in simulation time for each time step. + """ + result = [] + for time_ix in self.trajectory: + result.append(self.trajectory[time_ix].time) + return np.array(result) def _id_for_neighbor_of_types( self, @@ -248,7 +261,7 @@ def linear_fiber_axis_positions( for each particle in each fiber at each time. ideal_positions: np.ndarray (shape = 3 x 3) XYZ positions for 3 particles in an ideal chain. - ideal_vector_to_axis: np.ndarray + ideal_vector_to_axis: np.ndarray (shape = 3) Vector from the second ideal position to the axis of the fiber. @@ -494,35 +507,3 @@ def edge_positions(self) -> List[List[np.ndarray]]: for frame in self.trajectory: edges.append(frame.edges) return edges - - -def array_to_dataframe(fiber_point_array: ndarray) -> pd.DataFrame: - """ - Convert a 3D array to a pandas DataFrame. - - Parameters - ---------- - fiber_point_array: ndarray - The input 3D array. - - Returns - ------- - DataFrame: A pandas DataFrame with timepoint and fiber point as multi-index. - """ - # Reshape the array to remove the singleton dimensions - fiber_point_array = np.squeeze(fiber_point_array) - - # Reshape the array to have dimensions (timepoints * 50, 3) - reshaped_arr = fiber_point_array.reshape(-1, 3) - - # Create a DataFrame with timepoint and fiber point as multi-index - timepoints = np.repeat(range(fiber_point_array.shape[0]), 50) - fiber_points = np.tile(range(50), fiber_point_array.shape[0]) - - df = pd.DataFrame(reshaped_arr) - df["time"] = timepoints - df["id"] = fiber_points - - df.set_index(["time", "id"], inplace=True) - - return df diff --git a/subcell_pipeline/simulation/readdy/readdy_analysis.py b/subcell_pipeline/simulation/readdy/readdy_analysis.py deleted file mode 100644 index 67dcd72..0000000 --- a/subcell_pipeline/simulation/readdy/readdy_analysis.py +++ /dev/null @@ -1,457 +0,0 @@ -# %% [markdown] -# ## Readdy Analysis - -# %% [markdown] -# ## Download Readdy Files and postprocess them - -import argparse - -# %% -# import readdy -import boto3 -import numpy as np -import pandas as pd -from subcell_analysis.compression_analysis import COMPRESSIONMETRIC -from subcell_analysis.compression_workflow_runner import ( - compression_metrics_workflow, - plot_metric, - plot_metric_list, -) -from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor - -# %% -save_folder = "../data/readdy_h5_files" - -# %% -file_name = "actin_compression_velocity_15_0.h5" -s3 = boto3.client("s3") -response = s3.download_file( - "readdy-working-bucket", - "outputs/actin_compression_velocity=15_0.h5", - f"{save_folder}/{file_name}", -) - -# %% -h5_file_path = f"{save_folder}/{file_name}" - -post_processor = ReaddyPostProcessor( - ReaddyLoader(h5_file_path).trajectory(), - box_size=600.0 * np.ones(3), -) -fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, -) -axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), -) -fiber_points = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, -) -print(fiber_points) - - -import pandas as pd -from subcell_analysis.compression_analysis import COMPRESSIONMETRIC - -# %% -from subcell_analysis.compression_workflow_runner import ( - compression_metrics_workflow, - plot_metric, - plot_metric_list, -) -from subcell_analysis.cytosim.post_process_cytosim import create_dataframes_for_repeats - -# %% -arr = np.array(fiber_points[0][0]) -arr.shape - - -def array_to_dataframe(arr): - # Reshape the array to remove the singleton dimensions - arr = np.squeeze(arr) - - # Reshape the array to have dimensions (timepoints * 50, 3) - reshaped_arr = arr.reshape(-1, 3) - - # Create a DataFrame with timepoint and fiber point as multi-index - timepoints = np.repeat(range(arr.shape[0]), 50) - fiber_points = np.tile(range(50), arr.shape[0]) - - df = pd.DataFrame(reshaped_arr) - df["time"] = timepoints - df["id"] = fiber_points - - df.set_index(["time", "id"], inplace=True) - - return df - - -df_points = array_to_dataframe(arr) -df_points.reset_index(inplace=True) -df_points.rename(columns={0: "x", 1: "y", 2: "z"}, inplace=True) -single_timepoint = df_points[df_points["time"] == 0] -single_timepoint - - -# %% -df_points - -df_points["time"].unique() -df_points.to_csv("../dataframes/readdy_processed_data.csv", index=False) -# df_points.to_csv("../dataframes/readdy_processed_data.csv") -df_points = pd.read_csv("../dataframes/readdy_processed_data.csv") - - -import matplotlib.pyplot as plt - -# %% -import pandas as pd -from matplotlib.animation import FuncAnimation -from mpl_toolkits.mplot3d import Axes3D - -# Assuming you have a DataFrame named 'df_points' with columns 'time', 'id', 'x', 'y', and 'z' -# df_points = pd.DataFrame(...) # Your data goes here - -# Create a 3D plot -fig = plt.figure() -ax = fig.add_subplot(111, projection="3d") - -# Get unique timestamps in the data -timestamps = df_points["time"].unique() - - -# Function to update the plot at each time step -def update_plot(time_step, ax=ax): - ax.cla() # Clear previous plot - - # Filter the data for the current timestamp - data_at_time = df_points[df_points["time"] == timestamps[time_step]] - - # Plot the points at the current time step - ax.scatter( - data_at_time["x"], data_at_time["y"], data_at_time["z"], c="r", marker="o" - ) - - # Set plot labels and title - ax.set_xlabel("X Position") - ax.set_ylabel("Y Position") - ax.set_zlabel("Z Position") - ax.set_title(f"Time: {timestamps[time_step]}") - ax.set_xlim([-300, 300]) - ax.set_ylim([-15, 15]) - ax.set_zlim([-10, 30]) - ax.set_aspect("equal") - - -# Create the animation -update_plot(-1) - -# If you want to save the animation to a file -# animation.save('3d_animation.mp4', writer='ffmpeg') -# animation.save('3d_animation_frames/frame_{:04d}.png', writer='pillow', fps=1) - -# Show the plot (If you don't want to save it) -plt.show() - - -# %% -plt.close("all") - - -# %% -import matplotlib.pyplot as plt - -metrics = [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, -] -df_points = compression_metrics_workflow( - df_points, - [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - ], -) -df_points.columns - -# %% -for metric in metrics: - fig, ax = plt.subplots() - print(metric.value) - metric_by_time = df_points.groupby(["time"])[metric.value].mean() - ax.plot(metric_by_time, label=f"metric = {metric.value}") - plt.legend() - plt.show() - -# %% [markdown] -# ## Generating Outputs for All Readdy Simulations - -# %% -compression_velocities = [4.7, 15, 47, 150] -iterations = [0, 1, 2] -empty_array = np.zeros((len(compression_velocities), len(iterations))) - -# %% [markdown] -# Post Processing - -# %% -from pathlib import Path - -# %% -data_dir = Path("../data/readdy_h5_files") -data_dir.mkdir(exist_ok=True, parents=True) - -# %% - -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - new_file_path = ( - f"{data_dir}/readdy_actin_compression_velocity_{velocity}_{iteration}.h5" - ) - print(f"Downloading file: {new_file_path}") - response = s3.download_file( - "readdy-working-bucket", - f"outputs/actin_compression_velocity={velocity}_{iteration}.h5", - new_file_path, - ) - - -# %% -fiber_points = np.empty((4, 3), dtype=object) - -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - new_file_path = ( - f"{data_dir}/actin_compression_velocity_{velocity}_{iteration}.h5" - ) - post_processor = ReaddyPostProcessor( - ReaddyLoader(new_file_path).trajectory(), - box_size=600.0 * np.ones(3), - ) - fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), - ) - fiber_points[index][iteration] = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, - ) - - -# %% [markdown] -# Save processed fiber_points - -# %% -df_path = Path("../data/dataframes/") -df_path.mkdir(exist_ok=True, parents=True) - -# %% -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - print(index, iteration) - if index == 1 and iteration == 2: # TODO: check why this is happening - break - arr = np.array(fiber_points[index][iteration]) - print(arr.shape) - df_points = array_to_dataframe(arr) - df_points.reset_index(inplace=True) - df_points.rename(columns={0: "xpos", 1: "ypos", 2: "zpos"}, inplace=True) - df_points.to_csv( - f"{df_path}/actin_compression_velocity_{velocity}.{iteration}.csv", - index=False, - ) - - -# %% [markdown] -# ## Starting from Reading from CSV - -# %% -df_path = Path("../data/dataframes/") -df_path.mkdir(exist_ok=True, parents=True) - -# %% -processed_dataframes = np.empty( - (len(compression_velocities), len(iterations)), dtype=object -) -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - if index == 1 and iteration == 2: - break - processed_dataframes[index][iteration] = pd.read_csv( - f"{df_path}/readdy_actin_compression_velocity_{velocity}_repeat_{iteration}.csv" - ) - print(index, iteration, processed_dataframes[index][iteration].shape) - - -# %% [markdown] -# Calculate metrics for processed dataframes - -# %% -for index, velocity in enumerate(compression_velocities): - for iteration in iterations: - print(index, iteration) - if index == 1 and iteration == 2: - break - processed_dataframes[index][iteration] = compression_metrics_workflow( - processed_dataframes[index][iteration], - [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - ], - ) - -# %% -processed_dataframes[0][1] - -# %% [markdown] -# Plot calculated metrics - -# %% -import matplotlib.pyplot as plt - -# %% -figure_path = Path("../figures/readdy_metrics") -figure_path.mkdir(exist_ok=True, parents=True) -plt.close("all") - -# %% -metrics = [ - COMPRESSIONMETRIC.NON_COPLANARITY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, -] -compression_velocities = [4.7, 15, 47, 150] -for metric in metrics: - fig, axs = plt.subplots( - 1, - len(compression_velocities), - figsize=(len(compression_velocities) * 5, 5), - dpi=300, - sharey=True, - sharex=True, - ) - for index, velocity in enumerate(compression_velocities): - print(metric.value) - for iteration in iterations: - if index == 1 and iteration == 2: - continue - metric_by_time = ( - processed_dataframes[index][iteration] - .groupby(["time"])[metric.value] - .mean() - ) - axs[index].plot(metric_by_time, label=f"iteration = {iteration}") - axs[index].set_title(f"compression velocity = {velocity}") - axs[index].legend() - if index == 0: - axs[index].set_ylabel(metric.value) - fig.suptitle(f"Readdy") - fig.supxlabel("time") - plt.tight_layout() - plt.show() - fig.savefig(f"{figure_path}/actin_compression_all_velocities_{metric.value}.png") - -import matplotlib.pyplot as plt - -# %% -import pandas as pd -from matplotlib.animation import FuncAnimation -from mpl_toolkits.mplot3d import Axes3D - -# Assuming you have a DataFrame named 'df_points' with columns 'time', 'id', 'x', 'y', and 'z' -# df_points = pd.DataFrame(...) # Your data goes here - -# Create a 3D plot -fig = plt.figure() -ax = fig.add_subplot(111, projection="3d") - -# Get unique timestamps in the data -timestamps = processed_dataframes[0][0]["time"].unique() - - -# Function to update the plot at each time step -def update_plot(time_step, ax=ax): - ax.cla() # Clear previous plot - - # Filter the data for the current timestamp - data_at_time = df_points[df_points["time"] == timestamps[time_step]] - - # Plot the points at the current time step - ax.scatter( - data_at_time["x"], data_at_time["y"], data_at_time["z"], c="r", marker="o" - ) - - # Set plot labels and title - ax.set_xlabel("X Position") - ax.set_ylabel("Y Position") - ax.set_zlabel("Z Position") - ax.set_title(f"Time: {timestamps[time_step]}") - ax.set_xlim([-300, 300]) - ax.set_ylim([-15, 15]) - ax.set_zlim([-10, 30]) - ax.set_aspect("equal") - - -# Create the animation -ani = FuncAnimation(fig, update_plot, frames=len(timestamps), fargs=(ax,)) -ani.save("ani.txt") -# If you want to save the animation to a file -# animation.save('3d_animation.mp4', writer='ffmpeg') -# animation.save('3d_animation_frames/frame_{:04d}.png', writer='pillow', fps=1) diff --git a/subcell_pipeline/simulation/readdy/readdy_loading_example.py b/subcell_pipeline/simulation/readdy/readdy_loading_example.py deleted file mode 100644 index 11f289d..0000000 --- a/subcell_pipeline/simulation/readdy/readdy_loading_example.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -import argparse - -import numpy as np -from subcell_analysis.readdy import ReaddyLoader, ReaddyPostProcessor - - -class Args(argparse.Namespace): - def __init__(self) -> None: - self.__parse() - - def __parse(self) -> None: - p = argparse.ArgumentParser( - prog="readdy-actin-fiber-points", - description=( - "Load a ReaDDy actin trajectory and " - "calculate actin fiber control points." - ), - ) - p.add_argument( - "h5_file_path", - type=str, - help="The path to the ReaDDy .h5 file", - ) - p.parse_args(namespace=self) - - -def main() -> None: - args = Args() - post_processor = ReaddyPostProcessor( - ReaddyLoader(args.h5_file_path).trajectory(), - box_size=600.0 * np.ones(3), - ) - fiber_chain_ids = post_processor.linear_fiber_chain_ids( - start_particle_phrases=["pointed"], - other_particle_types=[ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", - ], - polymer_number_range=5, - ) - axis_positions, _ = post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] - ), - ideal_vector_to_axis=np.array( - [-0.01056751, -1.47785105, -0.65833209], - ), - ) - fiber_points = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - segment_length=10.0, - ) - print(fiber_points) - - -if __name__ == "__main__": - main() From 0b17f02192cce4cf0f9bdd4947efd767355aec12 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 25 Jun 2024 17:08:37 -0700 Subject: [PATCH 02/63] script to copy readdy outputs --- .../simulation/readdy/copy_readdy_outputs.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 subcell_pipeline/simulation/readdy/copy_readdy_outputs.py diff --git a/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py b/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py new file mode 100644 index 0000000..8701483 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py @@ -0,0 +1,37 @@ +import boto3 + + +def copy_readdy_outputs(): + """ + Copy ReaDDy outputs from where they were saved from running + https://github.com/simularium/readdy-models to have the same + AWS S3 file structure as for Cytosim. + """ + s3_client = boto3.client("s3") + bucket = "readdy-working-bucket" + src_name = "outputs/actin_compression_velocity=" + dest_name = "ACTIN_COMPRESSION_VELOCITY/outputs/ACTIN_COMPRESSION_VELOCITY" + src_condition_keys = ["4.7", "15", "47", "150"] + dest_condition_keys = ["0047", "0150", "0470", "1500"] + n_replicates = 5 + + for cond_ix in range(len(src_condition_keys)): + for rep_ix in range(n_replicates): + + src_cond = src_condition_keys[cond_ix] + src_path = f"{bucket}/{src_name}{src_cond}_{rep_ix}.h5" + + dest_cond = dest_condition_keys[cond_ix] + dest_key = f"{dest_name}_{dest_cond}_{rep_ix}.h5" + + s3_client.copy_object( + Bucket=bucket, + CopySource=src_path, + Key=dest_key, + ) + + print(f"copied {src_path} to {bucket}/{dest_key}") + + +if __name__ == "__main__": + copy_readdy_outputs() \ No newline at end of file From aa344d50739c645f010f08cc0e3dd017a8ad7bb4 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 27 Jun 2024 14:15:44 -0700 Subject: [PATCH 03/63] visualize individual trajectories, leverage readdy post processing code, add pickles --- subcell_pipeline/constants.py | 190 +++++++ subcell_pipeline/simulation/constants.py | 28 - .../simulation/readdy/__init__.py | 1 + ..._process_readdy_compression_simulations.py | 3 +- ...ocess_readdy_no_compression_simulations.py | 3 +- .../simulation/readdy/constants.py | 34 -- subcell_pipeline/simulation/readdy/loader.py | 48 +- subcell_pipeline/simulation/readdy/parser.py | 159 +++--- subcell_pipeline/temporary_file_io.py | 132 +++++ .../_visualize_individual_trajectories.py | 108 ++++ subcell_pipeline/visualization/visualizer.py | 491 ++++++++++++++++++ 11 files changed, 1042 insertions(+), 155 deletions(-) create mode 100644 subcell_pipeline/constants.py delete mode 100644 subcell_pipeline/simulation/constants.py delete mode 100644 subcell_pipeline/simulation/readdy/constants.py create mode 100644 subcell_pipeline/temporary_file_io.py create mode 100644 subcell_pipeline/visualization/_visualize_individual_trajectories.py create mode 100644 subcell_pipeline/visualization/visualizer.py diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py new file mode 100644 index 0000000..b563ca5 --- /dev/null +++ b/subcell_pipeline/constants.py @@ -0,0 +1,190 @@ +"""Constants for parsing simulations.""" + +from typing import Dict, List, Union + +import numpy as np + +from simulariumio import DisplayData, DISPLAY_TYPE + + +LOCAL_DOWNLOADS_PATH: str = "aws_downloads/" + +COLUMN_NAMES: List[str] = [ + "fiber_id", + "xpos", + "ypos", + "zpos", + "xforce", + "yforce", + "zforce", + "segment_curvature", + "time", + "fiber_point", +] + +COLUMN_DTYPES: Dict[str, Union[float, int]] = { + "fiber_id": int, + "xpos": float, + "ypos": float, + "zpos": float, + "xforce": float, + "yforce": float, + "zforce": float, + "segment_curvature": float, + "time": float, + "fiber_point": int, +} + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) + +READDY_TIMESTEP: float = 0.1 # ns + +READDY_SAVED_FRAMES: int = 1000 + +READDY_TOTAL_STEPS: Dict[str, int] = { + "ACTIN_NO_COMPRESSION" : 1e7, + "ACTIN_COMPRESSION_VELOCITY_0047" : 3.2e8, + "ACTIN_COMPRESSION_VELOCITY_0150" : 1e8, + "ACTIN_COMPRESSION_VELOCITY_0470" : 3.2e7, + "ACTIN_COMPRESSION_VELOCITY_1500" : 1e7, +} + +# particle types correspond to types from simularium/readdy-models +ACTIN_START_PARTICLE_PHRASE: str = "pointed" +ACTIN_PARTICLE_TYPES: List[str] = [ + "actin#", + "actin#ATP_", + "actin#mid_", + "actin#mid_ATP_", + "actin#fixed_", + "actin#fixed_ATP_", + "actin#mid_fixed_", + "actin#mid_fixed_ATP_", + "actin#barbed_", + "actin#barbed_ATP_", + "actin#fixed_barbed_", + "actin#fixed_barbed_ATP_", +] + +# measured from crystal structure +IDEAL_ACTIN_POSITIONS: np.ndarray = np.array( + [ + [24.738, 20.881, 26.671], + [27.609, 24.061, 27.598], + [30.382, 21.190, 25.725], + ] +) +IDEAL_ACTIN_VECTOR_TO_AXIS: np.ndarray = np.array( + [-0.01056751, -1.47785105, -0.65833209] +) + +CYTOSIM_SCALE_FACTOR: float = 1000.0 + + +def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: + extra_radius = 1.5 + actin_radius = 2.0 + extra_radius + n_polymer_numbers = 5 + result = {} + for i in range(1, n_polymer_numbers + 1): + result.update( + { + f"actin#{i}": DisplayData( + name="actin", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_{i}": DisplayData( + name="actin#mid", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#fixed_{i}": DisplayData( + name="actin#fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_fixed_{i}": DisplayData( + name="actin#mid_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#ATP_{i}": DisplayData( + name="actin#ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_ATP_{i}": DisplayData( + name="actin#mid_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#fixed_ATP_{i}": DisplayData( + name="actin#fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_fixed_ATP_{i}": DisplayData( + name="actin#mid_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#barbed_{i}": DisplayData( + name="actin#barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#barbed_ATP_{i}": DisplayData( + name="actin#barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_{i}": DisplayData( + name="actin#fixed_barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_ATP_{i}": DisplayData( + name="actin#fixed_barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#pointed_{i}": DisplayData( + name="actin#pointed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_ATP_{i}": DisplayData( + name="actin#pointed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_{i}": DisplayData( + name="actin#pointed_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_ATP_{i}": DisplayData( + name="actin#pointed_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + }, + ) + return result diff --git a/subcell_pipeline/simulation/constants.py b/subcell_pipeline/simulation/constants.py deleted file mode 100644 index 8667478..0000000 --- a/subcell_pipeline/simulation/constants.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Constants for parsing simulations.""" - - -COLUMN_NAMES = [ - "fiber_id", - "xpos", - "ypos", - "zpos", - "xforce", - "yforce", - "zforce", - "segment_curvature", - "time", - "fiber_point", -] - -COLUMN_DTYPES = { - "fiber_id": int, - "xpos": float, - "ypos": float, - "zpos": float, - "xforce": float, - "yforce": float, - "zforce": float, - "segment_curvature": float, - "time": float, - "fiber_point": int, -} \ No newline at end of file diff --git a/subcell_pipeline/simulation/readdy/__init__.py b/subcell_pipeline/simulation/readdy/__init__.py index fe08729..5582fab 100644 --- a/subcell_pipeline/simulation/readdy/__init__.py +++ b/subcell_pipeline/simulation/readdy/__init__.py @@ -3,3 +3,4 @@ from .data_structures import FrameData, TopologyData, ParticleData # noqa: F401 from .loader import ReaddyLoader # noqa: F401 from .post_processor import ReaddyPostProcessor # noqa: F401 +from .parser import load_readdy_fiber_points # noqa: F401 \ No newline at end of file diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py index 9d71656..eae05c5 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -21,7 +21,7 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% -from subcell_pipeline.simulation.cytosim.post_processing import ( +from subcell_pipeline.simulation.readdy.parser import ( parse_readdy_simulation_data, ) @@ -63,6 +63,7 @@ - Input: `(series_name)/outputs/(series_name)_(condition_key)_(index).h5` - Output: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` + and `(series_name)/data/(series_name)_(condition_key)_(seed).pkl` """ # %% diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py index 28cb540..7c2d5ed 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -20,10 +20,9 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% -from subcell_pipeline.simulation.cytosim.post_processing import ( +from subcell_pipeline.simulation.readdy.parser import ( parse_readdy_simulation_data, ) -from subcell_pipeline.simulation.post_processing import sample_simulation_data # %% [markdown] """ diff --git a/subcell_pipeline/simulation/readdy/constants.py b/subcell_pipeline/simulation/readdy/constants.py deleted file mode 100644 index 5bfb3e9..0000000 --- a/subcell_pipeline/simulation/readdy/constants.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Constants for parsing ReaDDy simulations.""" - - -import numpy as np - - -# particle types correspond to types from simularium/readdy-models -ACTIN_START_PARTICLE_PHRASE = "pointed" -ACTIN_PARTICLE_TYPES = [ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", -] - -# measured from crystal structure -IDEAL_ACTIN_POSITIONS = np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] -) -IDEAL_ACTIN_VECTOR_TO_AXIS = np.array( - [-0.01056751, -1.47785105, -0.65833209] -) diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index 0141161..9d2b784 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -6,6 +6,9 @@ import numpy as np import readdy from tqdm import tqdm +from io_collection.keys.check_key import check_key +from io_collection.load.load_pickle import load_pickle +from io_collection.save.save_pickle import save_pickle from .data_structures import FrameData, ParticleData, TopologyData @@ -18,7 +21,8 @@ def __init__( max_time_ix: int = -1, time_inc: int = 1, timestep: float = 100.0, - save_pickle_file: bool = False, + pickle_location: str = None, + pickle_key: str = None, ): """ Load and shape data from a ReaDDy trajectory. @@ -43,9 +47,13 @@ def __init__( (In any time units, resulting time measurements will be in the same units.) Default: 100. - save_pickle_file: bool = False (optional) - Save loaded data in a pickle file for easy reload? - Default: False + pickle_location: str (optional) + If provided along with pickle_key, + save a pickle file for easy reload. + This can be an AWS S3 bucket or a local path. + pickle_key: str (optional) + If provided along with pickle_location, + save a pickle file for easy reload. """ self._readdy_trajectory: readdy.Trajectory = None self._trajectory: Optional[List[FrameData]] = None @@ -54,7 +62,8 @@ def __init__( self.max_time_ix = max_time_ix self.time_inc = time_inc self.timestep = timestep - self.save_pickle_file = save_pickle_file + self.pickle_location = pickle_location + self.pickle_key = pickle_key def readdy_trajectory(self) -> readdy.Trajectory: """ @@ -67,6 +76,7 @@ def readdy_trajectory(self) -> readdy.Trajectory: The ReaDDy trajectory object. """ if self._readdy_trajectory is None: + # this line requires a path to a local file, does not support S3 paths self._readdy_trajectory = readdy.Trajectory(self.h5_file_path) return self._readdy_trajectory @@ -144,6 +154,9 @@ def _shape_trajectory_data(self) -> List[FrameData]: ) result.append(frame) return result + + def _use_pickle(self) -> bool: + return self.pickle_location is not None and self.pickle_key is not None def trajectory(self) -> List[FrameData]: """ @@ -157,25 +170,12 @@ def trajectory(self) -> List[FrameData]: """ if self._trajectory is not None: return self._trajectory - pickle_file_path = self.h5_file_path + ".dat" - if os.path.isfile(pickle_file_path): - print("Loading pickle file for ReaDDy data") - import pickle - - data = [] - with open(pickle_file_path, "rb") as f: - while True: - try: - data.append(pickle.load(f)) - except EOFError: - break - self._trajectory = data[0] + if self._use_pickle() and check_key(self.pickle_location, self.pickle_key): + print(f"Loading pickle file for ReaDDy data from {self.h5_file_path}") + self._trajectory = load_pickle(self.pickle_location, self.pickle_key) else: - print("Loading ReaDDy data from h5 file...") + print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") self._trajectory = self._shape_trajectory_data() - if self.save_pickle_file: - import pickle - - with open(pickle_file_path, "wb") as file: - pickle.dump(self._trajectory, file) + if self._use_pickle() and not check_key(self.pickle_location, self.pickle_key): + save_pickle(self.pickle_location, self.pickle_key, self._trajectory) return self._trajectory diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index 16ac21e..c5cef63 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -3,8 +3,6 @@ import os from typing import List, Union, Tuple -import boto3 -from botocore.exceptions import ClientError import numpy as np import pandas as pd from io_collection.keys.check_key import check_key @@ -12,86 +10,117 @@ from .loader import ReaddyLoader from .post_processor import ReaddyPostProcessor -from ..constants import COLUMN_NAMES, COLUMN_DTYPES -from .constants import ( +from ...constants import ( + COLUMN_NAMES, + COLUMN_DTYPES, + BOX_SIZE, + READDY_TIMESTEP, + READDY_TOTAL_STEPS, ACTIN_START_PARTICLE_PHRASE, ACTIN_PARTICLE_TYPES, IDEAL_ACTIN_POSITIONS, IDEAL_ACTIN_VECTOR_TO_AXIS, + LOCAL_DOWNLOADS_PATH, ) +from ...temporary_file_io import download_readdy_hdf5 -LOCAL_DOWNLOADS_PATH = "aws_downloads/" -READDY_TIMESTEP = 0.1 # ns -READDY_TOTAL_STEPS = { - "ACTIN_NO_COMPRESSION" : 1e7, - "ACTIN_COMPRESSION_VELOCITY_0047" : 3.2e8, - "ACTIN_COMPRESSION_VELOCITY_0150" : 1e8, - "ACTIN_COMPRESSION_VELOCITY_0470" : 3.2e7, - "ACTIN_COMPRESSION_VELOCITY_1500" : 1e7, -} -BOX_SIZE = np.array(3 * [600.0]) - - -s3_client = boto3.client("s3") - - -def _make_download_dir(): - if not os.path.isdir(LOCAL_DOWNLOADS_PATH): - os.makedirs(LOCAL_DOWNLOADS_PATH) - - -def _download_s3_file(bucket_name, key, dest_path) -> bool: - """ - Download files from S3 (skip files that already exist) - - (ReaDDy Python pkg currently requires a local file path) - """ - if os.path.isfile(dest_path): - # already downloaded - return False - try: - s3_client.download_file( - bucket_name, - key, - dest_path, - ) - print(f"Downloaded {dest_path}") - return True - except ClientError: - print(f"!!! Failed to download {key}") - return False - - -def _load_readdy_fiber_points( +def readdy_post_processor( + bucket: str, + series_name: str, series_key: str, rep_ix: int, n_timepoints: int, - n_monomer_points: int, -) -> Tuple[np.ndarray, np.ndarray]: +) -> ReaddyPostProcessor: """ - Load a ReaDDy trajectory, calculate the polymer trace from - the monomer particle positions (using measurements from x-ray crystallography), - and resample to get the requested number of points - along each linear fiber at each timestep. + Load a ReaddyPostProcessor from the specified ReaDDy trajectory. + (Load from a pickle if it exists.) + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Name of simulation series plus condition_key if applicable. + rep_ix + Replicate index. + n_timepoints + Number of timepoints to visualize. """ h5_file_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + pickle_key = f"{series_name}/data/{series_key}_{rep_ix}.pkl" time_inc = READDY_TOTAL_STEPS[series_key] / n_timepoints readdy_loader = ReaddyLoader( h5_file_path=str(h5_file_path), time_inc=time_inc, timestep=READDY_TIMESTEP, + pickle_location=bucket, + pickle_key=pickle_key, ) - readdy_post_processor = ReaddyPostProcessor( - readdy_loader.trajectory(), + return ReaddyPostProcessor( + readdy_loader.trajectory(), # this will load from a pickle if it exists box_size=BOX_SIZE, ) + + +def load_readdy_fiber_points( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, +) -> Tuple[List[List[List[int]]], List[List[np.ndarray]], np.ndarray, np.ndarray]: + """ + Load a ReaDDy trajectory, calculate the polymer trace from + the monomer particle positions (using measurements from x-ray crystallography), + and resample to get the requested number of points + along each linear fiber at each timestep. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Name of simulation series plus condition_key if applicable. + rep_ix + Replicate index. + n_timepoints + Number of timepoints to visualize. + n_monomer_points + Number of control points for each polymer trace. + + Returns + ------- + readdy_post_processor: ReaddyPostProcessor + The ReaddyPostProcessor loaded with this trajectory + in case it is needed for additional analysis. + fiber_chain_ids: List[List[List[int]]] + Particle IDs for particles in each linear fiber at each timestep + that match the axis_positions list. + axis_positions: List[List[np.ndarray (shape = n x 3)]] + List of lists of arrays containing the x,y,z positions + of the closest point on the fiber axis to the position + of each particle in each fiber at each time. + fiber_points: np.ndarray (shape = n_timepoints x n_fibers (1) x n x 3) + Array containing the x,y,z positions + of control points for each fiber at each time. + times: np.ndarray (shape = n_timepoints) + Simulation time at each timestep. + """ + readdy_post_processor = readdy_post_processor( + bucket, series_name, series_key, rep_ix, n_timepoints + ) fiber_chain_ids = readdy_post_processor.linear_fiber_chain_ids( start_particle_phrases=[ACTIN_START_PARTICLE_PHRASE], other_particle_types=ACTIN_PARTICLE_TYPES, polymer_number_range=5, ) - axis_positions, _ = readdy_post_processor.linear_fiber_axis_positions( + axis_positions, fiber_chain_ids = readdy_post_processor.linear_fiber_axis_positions( fiber_chain_ids=fiber_chain_ids, ideal_positions=IDEAL_ACTIN_POSITIONS, ideal_vector_to_axis=IDEAL_ACTIN_VECTOR_TO_AXIS, @@ -101,10 +130,12 @@ def _load_readdy_fiber_points( n_points=n_monomer_points, ) times = readdy_post_processor.times() - return np.array(fiber_points), times + return readdy_post_processor, fiber_chain_ids, axis_positions, np.array(fiber_points), times def _parse_readdy_simulation_trajectory( + bucket: str, + series_name: str, series_key: str, rep_ix: int, n_timepoints: int, @@ -114,8 +145,8 @@ def _parse_readdy_simulation_trajectory( Parse ReaDDy trajectory data into tidy data format. (Assume one fiber) """ - fiber_points, times = _load_readdy_fiber_points( - series_key, rep_ix, n_timepoints, n_monomer_points + _, _, _, fiber_points, times = load_readdy_fiber_points( + bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points ) point_data: list[list[Union[str, int, float]]] = [] @@ -167,8 +198,6 @@ def parse_readdy_simulation_data( n_replicates Number of simulation replicates. """ - _make_download_dir() - for condition_key in condition_keys: series_key = f"{series_name}_{condition_key}" if condition_key else series_name @@ -181,13 +210,11 @@ def parse_readdy_simulation_data( continue print(f"Parsing data for [ {condition_key} ] replicate [ {rep_ix} ]") - - aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" - local_h5_key = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") - _download_s3_file(bucket, aws_h5_key, local_h5_key) + + download_readdy_hdf5(bucket, series_name, series_key, rep_ix) data = _parse_readdy_simulation_trajectory( - series_key, rep_ix, n_timepoints, n_monomer_points + bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points ) save_dataframe(bucket, dataframe_key, data, index=False) diff --git a/subcell_pipeline/temporary_file_io.py b/subcell_pipeline/temporary_file_io.py new file mode 100644 index 0000000..92d38c8 --- /dev/null +++ b/subcell_pipeline/temporary_file_io.py @@ -0,0 +1,132 @@ + +"""Methods for parsing ReaDDy simulations.""" + +import os + +import boto3 +from botocore.exceptions import ClientError + +from .constants import LOCAL_DOWNLOADS_PATH + + +s3_client = boto3.client("s3") + + +def _make_download_dir() -> None: + if not os.path.isdir(LOCAL_DOWNLOADS_PATH): + os.makedirs(LOCAL_DOWNLOADS_PATH) + + +def _download_s3_file( + bucket: str, + key: str, + dest_path: str, +) -> bool: + """ + Download files from S3 + """ + if os.path.isfile(dest_path): + # already downloaded + return False + try: + s3_client.download_file( + bucket, + key, + dest_path, + ) + print(f"Downloaded {dest_path}") + return True + except ClientError: + print(f"!!! Failed to download {key}") + return False + + +def download_readdy_hdf5( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, +) -> bool: + """ + Download files from S3 + (ReaDDy Python pkg currently requires a local file path) + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + replicate_ix + Replicate index. + """ + aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" + local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + return _download_s3_file(bucket, aws_h5_key, local_h5_path) + + +def download_all_readdy_outputs( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, +) -> None: + """ + Download ReaDDy simulation outputs for all conditions and replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + """ + _make_download_dir() + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + + local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + + # Skip if file already exists. + if os.path.isfile(local_h5_path): + print(f"ReaDDy file [ { local_h5_path } ] already downloaded. Skipping.") + continue + + aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" + download_s3_file(bucket, aws_h5_key, local_h5_path) + + print(f"Downloaded data for [ {condition_key} ] replicate [ {rep_ix} ]") + + +def upload_file_to_s3(bucket: str, src_path: str, s3_path: str) -> bool: + """ + Upload a file to an S3 bucket + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + src_path + Local path to file to upload + s3_path + S3 key for where to save in the bucket + """ + if not os.path.isfile(src_path): + print(f"!!! File does not exist to upload {src_path}") + return False + try: + s3_client.upload_file(src_path, bucket, s3_path) + print(f"Uploaded to {s3_path}") + return True + except ClientError: + print(f"!!! Failed to upload {src_path}") + return False diff --git a/subcell_pipeline/visualization/_visualize_individual_trajectories.py b/subcell_pipeline/visualization/_visualize_individual_trajectories.py new file mode 100644 index 0000000..35158c7 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_individual_trajectories.py @@ -0,0 +1,108 @@ +# %% [markdown] +# # Process ReaDDy simulations + +# %% [markdown] +""" + +Notebook contains steps for visualizing ReaDDy and Cytosim +simulations of a single actin fiber. + +- [Visualize ReaDDy](#visualize-readdy) +- [Visualize Cytosim](#visualize-cytosim) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% [markdown] +""" +## Visualize ReaDDy + +Iterate through all condition keys and random seeds to load simulation output +files and visualize them. If the visualization file for a given +condition key and random seed already exists, parsing is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index).h5` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(index).simularium` +""" + +# %% +from subcell_pipeline.visualization.visualizer import ( + visualize_individual_readdy_trajectories, +) +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +visualize_individual_readdy_trajectories( + bucket, + series_name, + condition_keys, + n_replicates, + n_timepoints, + n_monomer_points, + overwrite_existing=True, +) + +# %% [markdown] +""" +## Visualize Cytosim + +Iterate through all condition keys and random seeds to load simulation output +dataframes and visualize them. If the visualization file for a given +condition key and random seed already exists, parsing is skipped. + +- Input: `(series_name)/samples/(series_name)_(condition_key)_(seed)/` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(seed).simularium` +""" + +# %% +from subcell_pipeline.visualization.visualizer import ( + visualize_individual_cytosim_trajectories, +) +# %% +# S3 bucket for input and output files +bucket: str = "s3://cytosim-working-bucket" + +# Random seeds for simulations +random_seeds: list[int] = [1, 2, 3, 4, 5] + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Number of timepoints +n_timepoints = 200 + +visualize_individual_cytosim_trajectories( + bucket, + "NO_COMPRESSION", + [""], + random_seeds, + n_timepoints, + overwrite_existing=True, +) + +visualize_individual_cytosim_trajectories( + bucket, + "COMPRESSION_VELOCITY", + condition_keys, + random_seeds, + n_timepoints, + overwrite_existing=True, +) diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py new file mode 100644 index 0000000..5624488 --- /dev/null +++ b/subcell_pipeline/visualization/visualizer.py @@ -0,0 +1,491 @@ +#!/usr/bin/env python + +import os +from typing import Tuple, Dict, List + +import numpy as np +from pint import UnitRegistry +from io_collection.keys.check_key import check_key +from io_collection.load.load_text import load_text +from simulariumio import ( + TrajectoryConverter, + MetaData, + InputFileData, + DisplayData, + DISPLAY_TYPE, + UnitData, + EveryNthTimestepFilter, + ScatterPlotData, + CameraData, +) +from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo +from simulariumio.readdy import ReaddyConverter, ReaddyData +from ..constants import ( + BOX_SIZE, + LOCAL_DOWNLOADS_PATH, + READDY_TIMESTEP, + READDY_TOTAL_STEPS, + READDY_SAVED_FRAMES, + READDY_DISPLAY_DATA, +) + +from ..temporary_file_io import ( + download_readdy_hdf5, + upload_file_to_s3 +) +from ..constants import ( + BOX_SIZE, + READDY_TOTAL_STEPS, + CYTOSIM_SCALE_FACTOR, +) +from ..analysis.compression_metrics.compression_analysis import ( + COMPRESSIONMETRIC, + get_asymmetry_of_peak, + get_average_distance_from_end_to_end_axis, + get_bending_energy_from_trace, + get_contour_length_from_trace, + get_third_component_variance, +) +from ..simulation.readdy import ReaddyPostProcessor, load_readdy_fiber_points +from .spatial_annotator import SpatialAnnotator + + +def _empty_scatter_plots( + total_steps: int = -1, + times: np.ndarray = None, + time_units: str = None, +) -> Dict[COMPRESSIONMETRIC, ScatterPlotData]: + if total_steps < 0 and times is None: + raise Exception("Either total_steps or times array is required for plots") + elif times is None: + # use normalized time + xlabel = "T (normalized)" + xtrace = (1 / float(total_steps)) * np.arange(total_steps) + else: + # use actual time + xlabel = f"T ({time_units})" + xtrace = times + total_steps = times.shape[0] + return { + COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: ScatterPlotData( + title="Average Perpendicular Distance", + xaxis_title=xlabel, + yaxis_title="distance (nm)", + xtrace=xtrace, + ytraces={ + "<<<": np.zeros(total_steps), + ">>>": 85.0 * np.ones(total_steps), + }, + render_mode="lines", + ), + COMPRESSIONMETRIC.CALC_BENDING_ENERGY: ScatterPlotData( + title="Bending Energy", + xaxis_title=xlabel, + yaxis_title="energy", + xtrace=xtrace, + ytraces={ + "<<<": np.zeros(total_steps), + ">>>": 10.0 * np.ones(total_steps), + }, + render_mode="lines", + ), + COMPRESSIONMETRIC.NON_COPLANARITY: ScatterPlotData( + title="Non-coplanarity", + xaxis_title=xlabel, + yaxis_title="3rd component variance from PCA", + xtrace=xtrace, + ytraces={ + "<<<": np.zeros(total_steps), + ">>>": 0.03 * np.ones(total_steps), + }, + render_mode="lines", + ), + COMPRESSIONMETRIC.PEAK_ASYMMETRY: ScatterPlotData( + title="Peak Asymmetry", + xaxis_title=xlabel, + yaxis_title="normalized peak distance", + xtrace=xtrace, + ytraces={ + "<<<": np.zeros(total_steps), + ">>>": 0.5 * np.ones(total_steps), + }, + render_mode="lines", + ), + COMPRESSIONMETRIC.CONTOUR_LENGTH: ScatterPlotData( + title="Contour Length", + xaxis_title=xlabel, + yaxis_title="filament contour length (nm)", + xtrace=xtrace, + ytraces={ + "<<<": 480 * np.ones(total_steps), + ">>>": 505 * np.ones(total_steps), + }, + render_mode="lines", + ), + } + + +def _generate_plot_data(fiber_points): + """ + Calculate plot traces from fiber_points. + """ + n_points = int(fiber_points.shape[2] / 3.0) + result = { + COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: [], + COMPRESSIONMETRIC.CALC_BENDING_ENERGY: [], + COMPRESSIONMETRIC.NON_COPLANARITY: [], + COMPRESSIONMETRIC.PEAK_ASYMMETRY: [], + COMPRESSIONMETRIC.CONTOUR_LENGTH: [], + } + total_steps = fiber_points.shape[0] + for time_ix in range(total_steps): + points = fiber_points[time_ix][0].reshape((n_points, 3)) + result[COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE].append( + get_average_distance_from_end_to_end_axis( + polymer_trace=points, + ) + ) + result[COMPRESSIONMETRIC.CALC_BENDING_ENERGY].append( + CYTOSIM_SCALE_FACTOR + * get_bending_energy_from_trace( + polymer_trace=points, + ) + ) + result[COMPRESSIONMETRIC.NON_COPLANARITY].append( + get_third_component_variance( + polymer_trace=points, + ) + ) + result[COMPRESSIONMETRIC.PEAK_ASYMMETRY].append( + get_asymmetry_of_peak( + polymer_trace=points, + ) + ) + result[COMPRESSIONMETRIC.CONTOUR_LENGTH].append( + get_contour_length_from_trace( + polymer_trace=points, + ) + ) + return result + + +def _add_plots( + converter: TrajectoryConverter, + fiber_points: np.ndarry, + times: np.ndarray, +): + """ + Add plots to an individual trajectory + using fiber_points to calculate metrics. + """ + scatter_plots = _empty_scatter_plots(times) + plot_data = _generate_plot_data(fiber_points) + for metric, plot in scatter_plots.items(): + plot.ytraces["filament"] = np.array(plot_data[metric]) + converter.add_plot(plot, "scatter") + + +def _add_readdy_spatial_annotations( + converter: TrajectoryConverter, + post_processor: ReaddyPostProcessor, + fiber_chain_ids: List[List[List[int]]], + axis_positions: List[List[np.ndarray]], + fiber_points: np.ndarray, +) -> None: + """ + Add visualizations of edges, normals, and control points + to the ReaDDy Simularium data. + """ + # edges + edges = post_processor.edge_positions() + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=edges, + type_name="edge", + fiber_width=0.5, + color="#eaeaea", + ) + # normals + normals = post_processor.linear_fiber_normals( + fiber_chain_ids=fiber_chain_ids, + axis_positions=axis_positions, + normal_length=10.0, + ) + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=normals, + type_name="normal", + fiber_width=0.5, + color="#685bf3", + ) + # control points + sphere_positions = [] + for time_ix in range(len(fiber_points)): + sphere_positions.append(fiber_points[time_ix][0]) + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + sphere_positions, + type_name="fiber point", + radius=0.8, + color="#eaeaea", + ) + + +def _load_readdy_simularium(path_to_readdy_h5: str, series_key: str) -> TrajectoryConverter: + """ + Get a TrajectoryData to visualize an actin trajectory in Simularium. + """ + total_steps = READDY_TOTAL_STEPS[series_key] + return ReaddyConverter(ReaddyData( + timestep=1e-6 * (READDY_TIMESTEP * total_steps / READDY_SAVED_FRAMES), + path_to_readdy_h5=path_to_readdy_h5, + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([0.0, 0.0, 300.0]), + look_at_position=np.zeros(3), + up_vector=np.array([0.0, 1.0, 0.0]), + fov_degrees=120.0, + ), + scale_factor=1.0, + ), + display_data=READDY_DISPLAY_DATA(), + time_units=UnitData("ms"), + spatial_units=UnitData("nm"), + )) + +def _visualize_readdy_trajectory( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, +) -> None: + """ + Save a Simularium file for a single ReaDDy trajectory with plots and spatial annotations. + """ + path_to_readdy_h5 = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + converter = _load_readdy_simularium(path_to_readdy_h5, series_key) + + # load data shaped for analysis from a pickle if it exists, otherwise save one + post_processor, fiber_chain_ids, axis_positions, fiber_points, times = load_readdy_fiber_points( + bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points + ) + _add_plots(converter, fiber_points, times) + _add_readdy_spatial_annotations( + converter, post_processor, fiber_chain_ids, axis_positions, fiber_points + ) + + # save simularium file + converter.save( + output_path=path_to_readdy_h5, + validate_ids=False, # for performance + ) + + +def visualize_individual_readdy_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, + n_monomer_points: int, + overwrite_existing: bool = True, +) -> None: + """ + Visualize individual ReaDDy simulations for select conditions and replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + n_timepoints + Number of timepoints to visualize. + n_monomer_points + Number of control points for each polymer trace. + overwrite_existing + Overwrite any outputs that already exist? + """ + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + output_key = f"{series_name}/viz/{series_key}_{rep_ix}.simularium" + + # Skip if output file already exists. + if not overwrite_existing and check_key(bucket, output_key): + print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") + continue + + print(f"Visualizing data for [ {condition_key} ] replicate [ {rep_ix} ]") + + download_readdy_hdf5(bucket, series_name, series_key, rep_ix) + + _visualize_readdy_trajectory( + bucket, + series_name, + series_key, + rep_ix, + n_timepoints, + n_monomer_points, + ) + + upload_file_to_s3(bucket, f"{local_h5_path}.simularium", output_key) + + +ureg = UnitRegistry() + +def _find_time_units(raw_time: float, units: str = "s") -> Tuple[str, float]: + """ + Get the compact time units and a multiplier to put the times in those units + """ + time = ureg.Quantity(raw_time, units) + time = time.to_compact() + return "{:~}".format(time.units), time.magnitude / raw_time + + +def _filter_time(converter: TrajectoryConverter, n_timepoints: int) -> TrajectoryConverter: + """ + Use Simulariumio time filter + """ + time_inc = int(converter._data.agent_data.times.shape[0] / n_timepoints) + if time_inc < 2: + return converter + converter._data = converter.filter_data( + [ + EveryNthTimestepFilter( + n=time_inc, + ), + ] + ) + return converter + + +def _load_cytosim_simularium( + fiber_points_data: str, + singles_data: str, + n_timepoints: int, +) -> TrajectoryConverter: + """ + Build a converter from a single Cytosim trajectory to Simularium. + """ + singles_display_data = DisplayData( + name="linker", + radius=0.01, + display_type=DISPLAY_TYPE.SPHERE, + color="#fff", + ) + converter = CytosimConverter(CytosimData( + meta_data=MetaData( + box_size=BOX_SIZE, + scale_factor=CYTOSIM_SCALE_FACTOR, + ), + object_info={ + "fibers": CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=fiber_points_data, + ), + display_data={ + 1: DisplayData( + name=f"actin", + radius=0.02, + display_type=DISPLAY_TYPE.FIBER, + ) + }, + ), + "singles" : CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=singles_data, + ), + display_data={ + 1 : singles_display_data, + 2 : singles_display_data, + 3 : singles_display_data, + 4 : singles_display_data, + } + ), + }, + )) + converter = _filter_time(converter, n_timepoints) + time_units, time_multiplier = _find_time_units(converter._data.agent_data.times[-1]) + converter._data.agent_data.times *= time_multiplier + converter._data.time_units = UnitData(time_units) + return converter + + +def _visualize_cytosim_trajectory( + fiber_points_data: str, + singles_data: str, + local_output_path: str, + n_timepoints: int, +) -> None: + """ + Save a Simularium file for a single Cytosim trajectory with plots. + """ + converter = _load_cytosim_simularium(fiber_points_data, singles_data, n_timepoints) + _add_plots( + converter, + converter._data.agent_data.subpoints, + converter._data.agent_data.times + ) + converter.save(local_output_path) + + +def visualize_individual_cytosim_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + random_seeds: list[int], + n_timepoints: int, + overwrite_existing: bool = True, +) -> None: + """ + Visualize individual Cytosim simulations for select conditions and replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + random_seeds + Random seeds for simulations. + n_timepoints + Number of timepoints to visualize. + overwrite_existing + Overwrite any outputs that already exist? + """ + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for index, seed in enumerate(random_seeds): + output_key = f"{series_name}/viz/{series_key}_{seed:06d}.simularium" + + # Skip if output file already exists. + if not overwrite_existing and check_key(bucket, output_key): + print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") + continue + + output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" + fiber_points_data = load_text( + bucket, output_key_template % "fiber_points.txt" + ) + singles_data = load_text( + bucket, output_key_template % "singles.txt" + ) + local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + _visualize_cytosim_trajectory( + fiber_points_data, singles_data, local_output_path, n_timepoints + ) + + upload_file_to_s3(bucket, local_output_path, output_key) From de1b0a503350335cbc5f011bc861adc60f6e0016 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 27 Jun 2024 16:17:16 -0700 Subject: [PATCH 04/63] visualize combined --- subcell_pipeline/constants.py | 6 + ..._process_readdy_compression_simulations.py | 10 +- ...ocess_readdy_no_compression_simulations.py | 8 +- subcell_pipeline/simulation/readdy/parser.py | 6 +- .../_visualize_all_trajectories_together.py | 66 +++++++ .../_visualize_individual_trajectories.py | 8 +- subcell_pipeline/visualization/visualizer.py | 179 +++++++++++++++++- 7 files changed, 262 insertions(+), 21 deletions(-) create mode 100644 subcell_pipeline/visualization/_visualize_all_trajectories_together.py diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py index b563ca5..ebef2ed 100644 --- a/subcell_pipeline/constants.py +++ b/subcell_pipeline/constants.py @@ -188,3 +188,9 @@ def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: }, ) return result + + +SIMULATOR_COLORS = { + "cytosim": "#1cbfa4", + "readdy": "#ffae52", +} \ No newline at end of file diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py index eae05c5..0b0a3a6 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -57,13 +57,13 @@ """ ## Parse simulation data -Iterate through all condition keys and random seeds to load simulation output +Iterate through all condition keys and replicates to load simulation output files and parse them into a tidy data format. If the parsed file for a given -condition key and random seed already exists, parsing is skipped. +condition key and replicate already exists, parsing is skipped. -- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index).h5` -- Output: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` - and `(series_name)/data/(series_name)_(condition_key)_(seed).pkl` +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` +- Output: `(series_name)/data/(series_name)_(condition_key)_(index+1).csv` + and `(series_name)/data/(series_name)_(condition_key)_(index+1).pkl` """ # %% diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py index 7c2d5ed..76b9162 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -52,12 +52,12 @@ """ ## Parse simulation data -Iterate through all condition keys and random seeds to load simulation output +Iterate through all replicates to load simulation output files and parse them into a tidy data format. If the parsed file for a given -condition key and random seed already exists, parsing is skipped. +replicate already exists, parsing is skipped. -- Input: `(series_name)/outputs/(series_name)_(index).h5` -- Output: `(series_name)/data/(series_name)_(index).csv` +- Input: `(series_name)/outputs/(series_name)_(index+1).h5` +- Output: `(series_name)/data/(series_name)_(index+1).csv` """ # %% diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index c5cef63..19e5b81 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -50,7 +50,8 @@ def readdy_post_processor( Number of timepoints to visualize. """ h5_file_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") - pickle_key = f"{series_name}/data/{series_key}_{rep_ix}.pkl" + rep_id = rep_ix + 1 + pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" time_inc = READDY_TOTAL_STEPS[series_key] / n_timepoints readdy_loader = ReaddyLoader( h5_file_path=str(h5_file_path), @@ -202,7 +203,8 @@ def parse_readdy_simulation_data( series_key = f"{series_name}_{condition_key}" if condition_key else series_name for rep_ix in range(n_replicates): - dataframe_key = f"{series_name}/data/{series_key}_{rep_ix}.csv" + rep_id = rep_ix + 1 + dataframe_key = f"{series_name}/data/{series_key}_{rep_id:06d}.csv" # Skip if dataframe file already exists. if check_key(bucket, dataframe_key): diff --git a/subcell_pipeline/visualization/_visualize_all_trajectories_together.py b/subcell_pipeline/visualization/_visualize_all_trajectories_together.py new file mode 100644 index 0000000..4f2a062 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_all_trajectories_together.py @@ -0,0 +1,66 @@ +# %% [markdown] +# # Process ReaDDy simulations + +# %% [markdown] +""" + +Notebook contains steps for visualizing ReaDDy and Cytosim +simulations of a single actin fiber. + +- [Visualize Combined](#visualize-combined) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% [markdown] +""" +## Visualize Combined + +Visualize all simulations with compression from ReaDDy and Cytosim together in Simularium. + +- Input: `(readdy_series_name)/data/(readdy_series_name)_(condition_key)_(index+1).csv` + and `(cytosim_series_name)/samples/(cytosim_series_name)_(condition_key)_(seed).csv` +- Output: `actin_compression_cytosim_readdy.simularium` +""" + +# %% +from subcell_pipeline.visualization.visualizer import ( + visualize_all_compressed_trajectories_together, +) +# %% +# S3 bucket for combined input and output files +subcell_bucket: str = "s3://subcell-working-bucket" + +# S3 bucket for ReaDDy input and output files +readdy_bucket: str = "s3://readdy-working-bucket" + +# Name of the ReaDDy simulation series +readdy_series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# S3 bucket for input and output files +cytosim_bucket: str = "s3://cytosim-working-bucket" + +# Name of the simulation series +cytosim_series_name: str = "COMPRESSION_VELOCITY" + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Number of simulation replicates +n_replicates: int = 5 + +# Number of timepoints +n_timepoints = 200 + +visualize_all_compressed_trajectories_together( + subcell_bucket, + readdy_bucket, + readdy_series_name, + cytosim_bucket, + cytosim_series_name, + condition_keys, + n_replicates, + n_timepoints, +) diff --git a/subcell_pipeline/visualization/_visualize_individual_trajectories.py b/subcell_pipeline/visualization/_visualize_individual_trajectories.py index 35158c7..8a2eead 100644 --- a/subcell_pipeline/visualization/_visualize_individual_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_individual_trajectories.py @@ -19,12 +19,12 @@ """ ## Visualize ReaDDy -Iterate through all condition keys and random seeds to load simulation output +Iterate through all condition keys and replicates to load simulation output files and visualize them. If the visualization file for a given -condition key and random seed already exists, parsing is skipped. +condition key and replicate already exists, parsing is skipped. -- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index).h5` -- Output: `(series_name)/viz/(series_name)_(condition_key)_(index).simularium` +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(index+1).simularium` """ # %% diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index 5624488..d1e5030 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -4,9 +4,11 @@ from typing import Tuple, Dict, List import numpy as np +import pandas as pd from pint import UnitRegistry from io_collection.keys.check_key import check_key from io_collection.load.load_text import load_text +from io_collection.load.load_dataframe import load_dataframe from simulariumio import ( TrajectoryConverter, MetaData, @@ -17,6 +19,8 @@ EveryNthTimestepFilter, ScatterPlotData, CameraData, + TrajectoryData, + AgentData, ) from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo from simulariumio.readdy import ReaddyConverter, ReaddyData @@ -27,6 +31,7 @@ READDY_TOTAL_STEPS, READDY_SAVED_FRAMES, READDY_DISPLAY_DATA, + SIMULATOR_COLORS, ) from ..temporary_file_io import ( @@ -125,7 +130,7 @@ def _empty_scatter_plots( } -def _generate_plot_data(fiber_points): +def _generate_plot_data(fiber_points: np.ndarray) -> Dict[COMPRESSIONMETRIC, list[float]]: """ Calculate plot traces from fiber_points. """ @@ -169,11 +174,11 @@ def _generate_plot_data(fiber_points): return result -def _add_plots( +def _add_individual_plots( converter: TrajectoryConverter, fiber_points: np.ndarry, times: np.ndarray, -): +) -> None: """ Add plots to an individual trajectory using fiber_points to calculate metrics. @@ -272,7 +277,7 @@ def _visualize_readdy_trajectory( post_processor, fiber_chain_ids, axis_positions, fiber_points, times = load_readdy_fiber_points( bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points ) - _add_plots(converter, fiber_points, times) + _add_individual_plots(converter, fiber_points, times) _add_readdy_spatial_annotations( converter, post_processor, fiber_chain_ids, axis_positions, fiber_points ) @@ -318,7 +323,8 @@ def visualize_individual_readdy_trajectories( for rep_ix in range(n_replicates): local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") - output_key = f"{series_name}/viz/{series_key}_{rep_ix}.simularium" + rep_id = rep_ix + 1 + output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" # Skip if output file already exists. if not overwrite_existing and check_key(bucket, output_key): @@ -431,7 +437,7 @@ def _visualize_cytosim_trajectory( Save a Simularium file for a single Cytosim trajectory with plots. """ converter = _load_cytosim_simularium(fiber_points_data, singles_data, n_timepoints) - _add_plots( + _add_individual_plots( converter, converter._data.agent_data.subpoints, converter._data.agent_data.times @@ -489,3 +495,164 @@ def visualize_individual_cytosim_trajectories( ) upload_file_to_s3(bucket, local_output_path, output_key) + + +def _load_fiber_points_from_dataframe( + simulator: str, + dataframe: pd.DataFrame, + n_timepoints: int +) -> np.ndarray: + """ + Save a Simularium file for a single Cytosim trajectory with plots. + """ + dataframe.sort_values(by=["time", "fiber_point"]) + total_steps = dataframe.time.unique().shape[0] + n_points = dataframe.fiber_point.unique().shape[0] + if total_steps != n_timepoints: + raise Exception( + f"Requested number of timesteps [ {n_timepoints} ] does not match " + f"number of timesteps in dataset [ {total_steps} ]." + ) + result = [] + for time_ix in range(total_steps): + result.append([]) + result[time_ix].append( + (CYTOSIM_SCALE_FACTOR if simulator == "cytosim" else 1) * np.array( + dataframe[time_ix * n_points : (time_ix + 1) * n_points][["xpos", "ypos", "zpos"]] + ) + ) + return np.array(result) + + +def _generate_simularium_all( + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: Dict[str, DisplayData], +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter with all simulations from ReaDDy and Cytosim together. + """ + total_conditions = len(fiber_points) + total_steps = fiber_points[0].shape[0] + n_monomer_points = fiber_points[0].shape[1] + subpoints = [] + traj_data = TrajectoryData( + meta_data=MetaData( + box_size=np.array([BOX_SIZE, BOX_SIZE, BOX_SIZE]), + camera_defaults=CameraData( + position=np.array([10.0, 0.0, 200.0]), + look_at_position=np.array([10.0, 0.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin compression in Cytosim and Readdy", + ), + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=total_conditions * np.ones((total_steps)), + viz_types=1001 + * np.ones((total_steps, total_conditions)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(total_conditions))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, total_conditions, 3)), + radii=np.ones((total_steps, total_conditions)), + n_subpoints=3 * n_monomer_points * np.ones((total_steps, total_conditions)), + subpoints=align(subpoints), + display_data=display_data, + ), + time_units=UnitData("count"), # frames + spatial_units=UnitData("nm"), # nanometer + ) + return TrajectoryConverter(traj_data) + + +def _add_combined_plots( + converter: TrajectoryConverter, + fiber_points: np.ndarry, + type_names: list[str], + n_timepoints: int, +) -> None: + """ + Add plots to an individual trajectory + using fiber_points to calculate metrics. + """ + scatter_plots = _empty_scatter_plots(total_steps=n_timepoints) + for traj_ix in range(len(fiber_points)): + plot_data = _generate_plot_data(fiber_points[traj_ix]) + for metric, plot in scatter_plots.items(): + plot.ytraces[type_names[traj_ix]] = np.array(plot_data[metric]) + for metric, plot in scatter_plots.items(): + converter.add_plot(plot, "scatter") + + +def visualize_all_compressed_trajectories_together( + subcell_bucket: str, + readdy_bucket: str, + readdy_series_name: str, + cytosim_bucket: str, + cytosim_series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, +) -> None: + """ + Visualize simulations from ReaDDy and Cytosim together + for select conditions and number of replicates. + + Parameters + ---------- + subcell_bucket + Name of S3 bucket for combined input and output files. + readdy_bucket + Name of S3 bucket for ReaDDy input and output files. + readdy_series_name + Name of ReaDDy simulation series. + cytosim_bucket + Name of S3 bucket for Cytosim input and output files. + cytosim_series_name + Name of Cytosim simulation series. + condition_keys + List of condition keys. + n_replicates + How many replicates to visualize. + n_timepoints + Number of timepoints to visualize. + """ + fiber_points = [] + type_names = [] + display_data = {} + for condition_key in condition_keys: + for index in range(n_replicates): + for simulator in SIMULATOR_COLORS: + + # get path of dataframe from simulation post-processing to use as input + rep_id = index + 1 + if simulator == "readdy": + bucket = readdy_bucket + df_key = f"{readdy_series_name}/data/{readdy_series_name}_{condition_key}_{rep_id:06d}.csv" + else: + bucket = cytosim_bucket + df_key = f"{cytosim_series_name}/samples/{cytosim_series_name}_{condition_key}_{rep_id:06d}.csv" + + # Skip if input dataframe does not exist. + if not check_key(bucket, df_key): + print(f"Dataframe not available for {simulator} [ { df_key } ]. Skipping.") + continue + + dataframe = load_dataframe(bucket, df_key) + fiber_points.append(_load_fiber_points_from_dataframe(simulator, dataframe, n_timepoints)) + condition = float(condition_key[:3] + "." + condition_key[-1]) + condition = round(condition) if condition_key[-1] == "0" else condition + type_names.append(f"{simulator}#{condition} um/s {index}") + display_data[type_names[-1]] = DisplayData( + name=type_names[-1], + display_type=DISPLAY_TYPE.FIBER, + color=SIMULATOR_COLORS[simulator], + ) + + converter = _generate_simularium_all(fiber_points, type_names, display_data) + _add_combined_plots(converter, fiber_points, type_names, n_timepoints) + output_key = "actin_compression_cytosim_readdy.simularium" + local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + converter.save(local_output_path) + + upload_file_to_s3(subcell_bucket, local_output_path, output_key) From 037f3bff3a23cfbce3c54c801e6f5cf6ac6b18a8 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 1 Jul 2024 11:36:06 -0700 Subject: [PATCH 05/63] add readdy baseline individual viz --- .../_visualize_individual_trajectories.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_individual_trajectories.py b/subcell_pipeline/visualization/_visualize_individual_trajectories.py index 8a2eead..de44883 100644 --- a/subcell_pipeline/visualization/_visualize_individual_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_individual_trajectories.py @@ -19,9 +19,10 @@ """ ## Visualize ReaDDy -Iterate through all condition keys and replicates to load simulation output -files and visualize them. If the visualization file for a given -condition key and replicate already exists, parsing is skipped. +Iterate through all condition keys and replicates to load simulation +output files and visualize them. If the visualization file for a given +condition key and replicate already exists and overwrite_existing is False, +parsing is skipped. - Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` - Output: `(series_name)/viz/(series_name)_(condition_key)_(index+1).simularium` @@ -52,7 +53,17 @@ visualize_individual_readdy_trajectories( bucket, - series_name, + "ACTIN_NO_COMPRESSION", + [""], + n_replicates, + n_timepoints, + n_monomer_points, + overwrite_existing=True, +) + +visualize_individual_readdy_trajectories( + bucket, + "ACTIN_COMPRESSION_VELOCITY", condition_keys, n_replicates, n_timepoints, From fa06164d9a20904876c4c849ca78a3b54a621bf4 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 1 Jul 2024 15:41:40 -0700 Subject: [PATCH 06/63] WIP tomography viz --- .../_analyze_actin_cme_tomography_data.py | 5 ++- .../_visualize_tomography_data.py | 39 +++++++++++++++++++ subcell_pipeline/visualization/visualizer.py | 27 +++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 subcell_pipeline/visualization/_visualize_tomography_data.py diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index f9350aa..ec9f39b 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -22,6 +22,8 @@ """ # %% +import pandas as pd + from subcell_pipeline.analysis.tomography_data.tomography_data import ( get_branched_tomography_data, get_unbranched_tomography_data, @@ -119,8 +121,9 @@ # %% sampled_key = f"{name}/{name}_coordinates_sampled.csv" +all_tomogram_df = pd.concat([branched_df, unbranched_df]) sampled_data = sample_tomography_data( - unbranched_df, bucket, sampled_key, n_monomer_points, minimum_points + all_tomogram_df, bucket, sampled_key, n_monomer_points, minimum_points ) # %% [markdown] diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py new file mode 100644 index 0000000..980bf2f --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -0,0 +1,39 @@ +# %% [markdown] +# # Visualize actin CME tomography data + +# %% [markdown] +""" + +Notebook contains steps for visualizing segmented tomography data +for actin fibers. + +- [Visualize Tomography](#visualize-tomography) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% [markdown] +""" +## Visualize Tomography + +Visualize segmented tomography data for actin fibers. + +- Input: `(name)/(name)_coordinates_sampled.csv` +- Output: `(name).simularium` +""" + +# %% +from subcell_pipeline.visualization.visualizer import ( + visualize_tomography, +) + +# %% +# Dataset name +name = "actin_cme_tomography" + +# S3 bucket for input and output files +bucket = "s3://subcell-working-bucket" + +visualize_tomography(bucket, name) diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index d1e5030..57873cc 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -656,3 +656,30 @@ def visualize_all_compressed_trajectories_together( converter.save(local_output_path) upload_file_to_s3(subcell_bucket, local_output_path, output_key) + + +def visualize_tomography(bucket: str, name: str) -> None: + """ + Visualize segmented tomography data for actin fibers. + + Parameters + ---------- + data + Tomography data. + """ + # TODO + + sampled_tomography_key = f"{name}/{name}_coordinates_sampled.csv" + data = "TODO" + + for _, group in data.groupby("dataset"): + for _, fiber in group.groupby("id"): + pos = [fiber["xpos"], fiber["ypos"], fiber["zpos"]] + + output_key = f"{name}.simularium" + local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + converter = "TODO" + converter.save(local_output_path) + + upload_file_to_s3(bucket, local_output_path, output_key) + \ No newline at end of file From 9bd0675c282b78494f2495732e3900a5b095bd30 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 1 Jul 2024 16:37:11 -0700 Subject: [PATCH 07/63] tomography visualization --- .../_analyze_actin_cme_tomography_data.py | 10 +- .../tomography_data/tomography_data.py | 2 +- subcell_pipeline/constants.py | 16 +- subcell_pipeline/visualization/visualizer.py | 182 ++++++++++++++++-- 4 files changed, 185 insertions(+), 25 deletions(-) diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index ec9f39b..19aa8be 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -30,6 +30,7 @@ plot_tomography_data_by_dataset, sample_tomography_data, ) +from subcell_pipeline.constants import TOMOGRAPHY_SCALE_FACTOR # %% [markdown] """ @@ -68,17 +69,16 @@ ("2018November_32", "TomoNovember_32_Vesicle"), ] -# Spatial conversion scaling factor (pixels to um) -scale_factor = 0.00006 - # %% branched_df = get_branched_tomography_data( - bucket, name, repository, branched_datasets, scale_factor + bucket, name, repository, branched_datasets, TOMOGRAPHY_SCALE_FACTOR ) unbranched_df = get_unbranched_tomography_data( - bucket, name, repository, unbranched_datasets, scale_factor + bucket, name, repository, unbranched_datasets, TOMOGRAPHY_SCALE_FACTOR ) +# TODO run metric analysis on tomography data + # %% [markdown] """ ## Plot branched tomography fibers diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index e3ee13e..c01cf35 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -5,7 +5,7 @@ from io_collection.load.load_dataframe import load_dataframe from io_collection.save.save_dataframe import save_dataframe -SAMPLE_COLUMNS = ["xpos", "ypos", "zpos"] +from ...constants import TOMOGRAPHY_SAMPLE_COLUMNS def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py index ebef2ed..b50d544 100644 --- a/subcell_pipeline/constants.py +++ b/subcell_pipeline/constants.py @@ -190,7 +190,19 @@ def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: return result -SIMULATOR_COLORS = { +SIMULATOR_COLORS: Dict[str, str] = { "cytosim": "#1cbfa4", "readdy": "#ffae52", -} \ No newline at end of file +} + + +TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] + + +TOMOGRAPHY_VIZ_SCALE: float = 0.1 + + +TOMOGRAPHY_MIN_COMPRESSION: int = 2 + +# pixels to um +TOMOGRAPHY_SCALE_FACTOR: float = 0.00006 diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index 57873cc..0841d10 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -18,9 +18,11 @@ UnitData, EveryNthTimestepFilter, ScatterPlotData, + HistogramPlotData, CameraData, TrajectoryData, AgentData, + DimensionData, ) from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo from simulariumio.readdy import ReaddyConverter, ReaddyData @@ -32,6 +34,10 @@ READDY_SAVED_FRAMES, READDY_DISPLAY_DATA, SIMULATOR_COLORS, + TOMOGRAPHY_SAMPLE_COLUMNS, + TOMOGRAPHY_VIZ_SCALE, + TOMOGRAPHY_MIN_COMPRESSION, + TOMOGRAPHY_SCALE_FACTOR, ) from ..temporary_file_io import ( @@ -238,7 +244,8 @@ def _add_readdy_spatial_annotations( def _load_readdy_simularium(path_to_readdy_h5: str, series_key: str) -> TrajectoryConverter: """ - Get a TrajectoryData to visualize an actin trajectory in Simularium. + Load from ReaDDy outputs and generate a TrajectoryConverter + to visualize an actin trajectory in Simularium. """ total_steps = READDY_TOTAL_STEPS[series_key] return ReaddyConverter(ReaddyData( @@ -381,7 +388,8 @@ def _load_cytosim_simularium( n_timepoints: int, ) -> TrajectoryConverter: """ - Build a converter from a single Cytosim trajectory to Simularium. + Load from Cytosim outputs and generate a TrajectoryConverter + to visualize an actin trajectory in Simularium. """ singles_display_data = DisplayData( name="linker", @@ -503,7 +511,9 @@ def _load_fiber_points_from_dataframe( n_timepoints: int ) -> np.ndarray: """ - Save a Simularium file for a single Cytosim trajectory with plots. + Load fiber points from pre-calculated dataframes + and generate a TrajectoryConverter to visualize + all actin trajectories together in Simularium. """ dataframe.sort_values(by=["time", "fiber_point"]) total_steps = dataframe.time.unique().shape[0] @@ -524,7 +534,7 @@ def _load_fiber_points_from_dataframe( return np.array(result) -def _generate_simularium_all( +def _load_all_together_simularium( fiber_points: list[np.ndarray], type_names: list[str], display_data: Dict[str, DisplayData], @@ -572,7 +582,7 @@ def _add_combined_plots( n_timepoints: int, ) -> None: """ - Add plots to an individual trajectory + Add plots for all trajectories together using fiber_points to calculate metrics. """ scatter_plots = _empty_scatter_plots(total_steps=n_timepoints) @@ -649,7 +659,7 @@ def visualize_all_compressed_trajectories_together( color=SIMULATOR_COLORS[simulator], ) - converter = _generate_simularium_all(fiber_points, type_names, display_data) + converter = _load_all_together_simularium(fiber_points, type_names, display_data) _add_combined_plots(converter, fiber_points, type_names, n_timepoints) output_key = "actin_compression_cytosim_readdy.simularium" local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) @@ -658,6 +668,152 @@ def visualize_all_compressed_trajectories_together( upload_file_to_s3(subcell_bucket, local_output_path, output_key) +def _empty_tomography_plots(): + return { + "CONTOUR_LENGTH" : HistogramPlotData( + title="Contour Length", + xaxis_title="filament contour length (nm)", + traces={}, + ), + "COMPRESSION_RATIO" : HistogramPlotData( + title="Compression Percentage", + xaxis_title="percent (%)", + traces={}, + ), + "AVERAGE_PERP_DISTANCE" : HistogramPlotData( + title="Average Perpendicular Distance", + xaxis_title="distance (nm)", + traces={}, + ), + "CALC_BENDING_ENERGY" : HistogramPlotData( + title="Bending Energy", + xaxis_title="energy", + traces={}, + ), + "NON_COPLANARITY" : HistogramPlotData( + title="Non-coplanarity", + xaxis_title="3rd component variance from PCA", + traces={}, + ), + "PEAK_ASYMMETRY" : HistogramPlotData( + title="Peak Asymmetry", + xaxis_title="normalized peak distance", + traces={}, + ), + } + + +def _add_tomography_plots(tomo_df, converter): + """ + Add plots to tomography data using pre-calculated metrics. + """ + plots = _empty_tomography_plots() + for metric_name in plots: + col_ix = list(tomo_df.columns).index(metric_name) + plots[metric_name].traces["actin"] = np.array(list(list(map(set, tomo_df.values.T))[col_ix])) + if metric_name == "COMPRESSION_RATIO": + plots[metric_name].traces["actin"] *= 100. + converter.add_plot(plots[metric_name], "histogram") + + +def _get_tomography_spatial_center_and_size(tomo_df): + """ + Get the center and size of the tomography dataset in 3D space. + """ + ixs = [ + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[0]), + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[1]), + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[2]), + ] + unique_values = list(map(set, tomo_df.values.T)) + mins = [] + maxs = [] + for dim_ix in range(3): + d_values = np.array(list(unique_values[ixs[dim_ix]])) + mins.append(np.amin(d_values)) + maxs.append(np.amax(d_values)) + mins = np.array(mins) + maxs = np.array(maxs) + return mins + 0.5 * (maxs - mins), maxs - mins + + +def _load_tomography_simularium(bucket: str, name: str) -> TrajectoryConverter: + """ + Load sampled tomography data and generate a TrajectoryConverter + to visualize it in Simularium. + """ + tomo_key = f"{name}/{name}_coordinates_sampled.csv" + tomo_df = load_dataframe(bucket, tomo_key) + tomo_df = tomo_df.sort_values(by=["id"]) + tomo_df = tomo_df.reset_index(drop=True) + names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) + traj_ids = names[np.argsort(ids)] + center, box_size = _get_tomography_spatial_center_and_size(tomo_df) + max_points = 0 + subpoints = [] + compression_ratios = [] + for traj_id in traj_ids: + fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] + points = np.array(fiber_df[["xpos", "ypos", "zpos"]]) - center + subpoints.append(TOMOGRAPHY_VIZ_SCALE * points.flatten()) + compression_ratios.append(list(fiber_df["id"])[0]) + if len(fiber_df) > max_points: + max_points = len(fiber_df) + n_agents = len(subpoints) + compression_percents = 100. * np.array(compression_ratios) + min_compression_ratio = np.amin(compression_percents) + max_compression_ratio = np.amax(compression_percents) + bins = np.linspace(min_compression_ratio, max_compression_ratio, 100) + digitized = np.digitize(compression_percents, bins) + type_names = [] + display_data = {} + type_name_min = f"actin less than {TOMOGRAPHY_MIN_COMPRESSION}.0 percent compressed" + for agent_ix in range(n_agents): + bin_percent = int(10 * bins[digitized[agent_ix] - 1]) / 10. + if bin_percent < TOMOGRAPHY_MIN_COMPRESSION: + type_name = type_name_min + else: + type_name = f"actin {bin_percent} percent compressed" + type_names.append(type_name) + if type_name not in display_data: + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + ) + display_data[type_name_min] = DisplayData( + name=type_name_min, + display_type=DISPLAY_TYPE.FIBER, + color="#222222", + ) + agent_data = AgentData.from_dimensions(DimensionData( + total_steps=1, + max_agents=n_agents, + max_subpoints=3 * max_points, + )) + agent_data.n_agents[0] = n_agents + agent_data.viz_types[0] = 1001.0 * np.ones(n_agents) + agent_data.unique_ids[0] = np.arange(n_agents) + agent_data.types[0] = type_names + agent_data.radii *= 0.5 + for agent_ix in range(n_agents): + n_subpoints = subpoints[agent_ix].shape[0] + agent_data.n_subpoints[0][agent_ix] = n_subpoints + agent_data.subpoints[0][agent_ix][:n_subpoints] = subpoints[agent_ix] + agent_data.display_data = display_data + UNIT_SCALE_FACTOR = 10 / 2. # not sure where this factor came from or if it is still needed + traj_data = TrajectoryData( + meta_data=MetaData( + box_size=TOMOGRAPHY_VIZ_SCALE * box_size, + camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])) + ), + agent_data=agent_data, + spatial_units=UnitData("um", UNIT_SCALE_FACTOR * TOMOGRAPHY_SCALE_FACTOR / TOMOGRAPHY_VIZ_SCALE), # 0.003 + ) + converter = TrajectoryConverter(traj_data) + _add_tomography_plots(tomo_df, converter) + return converter + + def visualize_tomography(bucket: str, name: str) -> None: """ Visualize segmented tomography data for actin fibers. @@ -666,19 +822,11 @@ def visualize_tomography(bucket: str, name: str) -> None: ---------- data Tomography data. - """ - # TODO - - sampled_tomography_key = f"{name}/{name}_coordinates_sampled.csv" - data = "TODO" - - for _, group in data.groupby("dataset"): - for _, fiber in group.groupby("id"): - pos = [fiber["xpos"], fiber["ypos"], fiber["zpos"]] - + """ output_key = f"{name}.simularium" local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) - converter = "TODO" + + converter = _load_tomography_simularium(bucket, name) converter.save(local_output_path) upload_file_to_s3(bucket, local_output_path, output_key) From 4bd4c60fe99f1f5f4dffc88b994d7b17329ba00a Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 1 Jul 2024 18:02:34 -0700 Subject: [PATCH 08/63] calculate compression metrics on tomography data for viz --- .../_compare_compression_metrics.py | 2 +- .../_analyze_actin_cme_tomography_data.py | 2 - .../_visualize_individual_trajectories.py | 13 +- subcell_pipeline/visualization/visualizer.py | 115 +++++++----------- 4 files changed, 55 insertions(+), 77 deletions(-) diff --git a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py index ce6bff3..e0d4d83 100644 --- a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py +++ b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py @@ -107,7 +107,7 @@ # %% readdy_metrics = get_compression_metric_data( bucket=readdy_bucket, - series_name=series_name, + series_name=f"ACTIN_{series_name}", condition_keys=condition_keys, random_seeds=random_seeds, metrics=metrics, diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index 19aa8be..d77d679 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -77,8 +77,6 @@ bucket, name, repository, unbranched_datasets, TOMOGRAPHY_SCALE_FACTOR ) -# TODO run metric analysis on tomography data - # %% [markdown] """ ## Plot branched tomography fibers diff --git a/subcell_pipeline/visualization/_visualize_individual_trajectories.py b/subcell_pipeline/visualization/_visualize_individual_trajectories.py index de44883..e137d7f 100644 --- a/subcell_pipeline/visualization/_visualize_individual_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_individual_trajectories.py @@ -21,7 +21,7 @@ Iterate through all condition keys and replicates to load simulation output files and visualize them. If the visualization file for a given -condition key and replicate already exists and overwrite_existing is False, +condition key and replicate already exists and recalculate is False, parsing is skipped. - Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` @@ -58,7 +58,7 @@ n_replicates, n_timepoints, n_monomer_points, - overwrite_existing=True, + recalculate=True, ) visualize_individual_readdy_trajectories( @@ -68,7 +68,7 @@ n_replicates, n_timepoints, n_monomer_points, - overwrite_existing=True, + recalculate=True, ) # %% [markdown] @@ -77,7 +77,8 @@ Iterate through all condition keys and random seeds to load simulation output dataframes and visualize them. If the visualization file for a given -condition key and random seed already exists, parsing is skipped. +condition key and random seed already exists and recalculate is False, +parsing is skipped. - Input: `(series_name)/samples/(series_name)_(condition_key)_(seed)/` - Output: `(series_name)/viz/(series_name)_(condition_key)_(seed).simularium` @@ -106,7 +107,7 @@ [""], random_seeds, n_timepoints, - overwrite_existing=True, + recalculate=True, ) visualize_individual_cytosim_trajectories( @@ -115,5 +116,5 @@ condition_keys, random_seeds, n_timepoints, - overwrite_existing=True, + recalculate=True, ) diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index 0841d10..f11cfe6 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -49,23 +49,17 @@ READDY_TOTAL_STEPS, CYTOSIM_SCALE_FACTOR, ) -from ..analysis.compression_metrics.compression_analysis import ( - COMPRESSIONMETRIC, - get_asymmetry_of_peak, - get_average_distance_from_end_to_end_axis, - get_bending_energy_from_trace, - get_contour_length_from_trace, - get_third_component_variance, +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, ) from ..simulation.readdy import ReaddyPostProcessor, load_readdy_fiber_points from .spatial_annotator import SpatialAnnotator - def _empty_scatter_plots( total_steps: int = -1, times: np.ndarray = None, time_units: str = None, -) -> Dict[COMPRESSIONMETRIC, ScatterPlotData]: +) -> Dict[CompressionMetric, ScatterPlotData]: if total_steps < 0 and times is None: raise Exception("Either total_steps or times array is required for plots") elif times is None: @@ -78,7 +72,7 @@ def _empty_scatter_plots( xtrace = times total_steps = times.shape[0] return { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: ScatterPlotData( + CompressionMetric.AVERAGE_PERP_DISTANCE: ScatterPlotData( title="Average Perpendicular Distance", xaxis_title=xlabel, yaxis_title="distance (nm)", @@ -89,7 +83,7 @@ def _empty_scatter_plots( }, render_mode="lines", ), - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: ScatterPlotData( + CompressionMetric.CALC_BENDING_ENERGY: ScatterPlotData( title="Bending Energy", xaxis_title=xlabel, yaxis_title="energy", @@ -100,7 +94,7 @@ def _empty_scatter_plots( }, render_mode="lines", ), - COMPRESSIONMETRIC.NON_COPLANARITY: ScatterPlotData( + CompressionMetric.NON_COPLANARITY: ScatterPlotData( title="Non-coplanarity", xaxis_title=xlabel, yaxis_title="3rd component variance from PCA", @@ -111,7 +105,7 @@ def _empty_scatter_plots( }, render_mode="lines", ), - COMPRESSIONMETRIC.PEAK_ASYMMETRY: ScatterPlotData( + CompressionMetric.PEAK_ASYMMETRY: ScatterPlotData( title="Peak Asymmetry", xaxis_title=xlabel, yaxis_title="normalized peak distance", @@ -122,7 +116,7 @@ def _empty_scatter_plots( }, render_mode="lines", ), - COMPRESSIONMETRIC.CONTOUR_LENGTH: ScatterPlotData( + CompressionMetric.CONTOUR_LENGTH: ScatterPlotData( title="Contour Length", xaxis_title=xlabel, yaxis_title="filament contour length (nm)", @@ -136,47 +130,27 @@ def _empty_scatter_plots( } -def _generate_plot_data(fiber_points: np.ndarray) -> Dict[COMPRESSIONMETRIC, list[float]]: +def _generate_plot_data(fiber_points: np.ndarray) -> Dict[CompressionMetric, list[float]]: """ Calculate plot traces from fiber_points. """ n_points = int(fiber_points.shape[2] / 3.0) result = { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: [], - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: [], - COMPRESSIONMETRIC.NON_COPLANARITY: [], - COMPRESSIONMETRIC.PEAK_ASYMMETRY: [], - COMPRESSIONMETRIC.CONTOUR_LENGTH: [], + CompressionMetric.AVERAGE_PERP_DISTANCE: [], + CompressionMetric.CALC_BENDING_ENERGY: [], + CompressionMetric.NON_COPLANARITY: [], + CompressionMetric.PEAK_ASYMMETRY: [], + CompressionMetric.CONTOUR_LENGTH: [], } total_steps = fiber_points.shape[0] for time_ix in range(total_steps): points = fiber_points[time_ix][0].reshape((n_points, 3)) - result[COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE].append( - get_average_distance_from_end_to_end_axis( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CALC_BENDING_ENERGY].append( - CYTOSIM_SCALE_FACTOR - * get_bending_energy_from_trace( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.NON_COPLANARITY].append( - get_third_component_variance( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.PEAK_ASYMMETRY].append( - get_asymmetry_of_peak( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CONTOUR_LENGTH].append( - get_contour_length_from_trace( - polymer_trace=points, + for metric in result.keys(): + result[metric].append( + metric.calculate_metric( + polymer_trace=points + ) ) - ) return result @@ -303,7 +277,7 @@ def visualize_individual_readdy_trajectories( n_replicates: int, n_timepoints: int, n_monomer_points: int, - overwrite_existing: bool = True, + recalculate: bool = True, ) -> None: """ Visualize individual ReaDDy simulations for select conditions and replicates. @@ -322,7 +296,7 @@ def visualize_individual_readdy_trajectories( Number of timepoints to visualize. n_monomer_points Number of control points for each polymer trace. - overwrite_existing + recalculate Overwrite any outputs that already exist? """ for condition_key in condition_keys: @@ -334,7 +308,7 @@ def visualize_individual_readdy_trajectories( output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" # Skip if output file already exists. - if not overwrite_existing and check_key(bucket, output_key): + if not recalculate and check_key(bucket, output_key): print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") continue @@ -459,7 +433,7 @@ def visualize_individual_cytosim_trajectories( condition_keys: list[str], random_seeds: list[int], n_timepoints: int, - overwrite_existing: bool = True, + recalculate: bool = True, ) -> None: """ Visualize individual Cytosim simulations for select conditions and replicates. @@ -476,7 +450,7 @@ def visualize_individual_cytosim_trajectories( Random seeds for simulations. n_timepoints Number of timepoints to visualize. - overwrite_existing + recalculate Overwrite any outputs that already exist? """ for condition_key in condition_keys: @@ -486,7 +460,7 @@ def visualize_individual_cytosim_trajectories( output_key = f"{series_name}/viz/{series_key}_{seed:06d}.simularium" # Skip if output file already exists. - if not overwrite_existing and check_key(bucket, output_key): + if not recalculate and check_key(bucket, output_key): print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") continue @@ -668,34 +642,34 @@ def visualize_all_compressed_trajectories_together( upload_file_to_s3(subcell_bucket, local_output_path, output_key) -def _empty_tomography_plots(): +def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: return { - "CONTOUR_LENGTH" : HistogramPlotData( + CompressionMetric.CONTOUR_LENGTH : HistogramPlotData( title="Contour Length", xaxis_title="filament contour length (nm)", traces={}, ), - "COMPRESSION_RATIO" : HistogramPlotData( + CompressionMetric.COMPRESSION_RATIO : HistogramPlotData( title="Compression Percentage", xaxis_title="percent (%)", traces={}, ), - "AVERAGE_PERP_DISTANCE" : HistogramPlotData( + CompressionMetric.AVERAGE_PERP_DISTANCE : HistogramPlotData( title="Average Perpendicular Distance", xaxis_title="distance (nm)", traces={}, ), - "CALC_BENDING_ENERGY" : HistogramPlotData( + CompressionMetric.CALC_BENDING_ENERGY : HistogramPlotData( title="Bending Energy", xaxis_title="energy", traces={}, ), - "NON_COPLANARITY" : HistogramPlotData( + CompressionMetric.NON_COPLANARITY : HistogramPlotData( title="Non-coplanarity", xaxis_title="3rd component variance from PCA", traces={}, ), - "PEAK_ASYMMETRY" : HistogramPlotData( + CompressionMetric.PEAK_ASYMMETRY : HistogramPlotData( title="Peak Asymmetry", xaxis_title="normalized peak distance", traces={}, @@ -703,20 +677,25 @@ def _empty_tomography_plots(): } -def _add_tomography_plots(tomo_df, converter): +def _add_tomography_plots(tomo_df: pd.DataFrame, converter: TrajectoryConverter) -> None: """ Add plots to tomography data using pre-calculated metrics. """ plots = _empty_tomography_plots() - for metric_name in plots: - col_ix = list(tomo_df.columns).index(metric_name) - plots[metric_name].traces["actin"] = np.array(list(list(map(set, tomo_df.values.T))[col_ix])) - if metric_name == "COMPRESSION_RATIO": - plots[metric_name].traces["actin"] *= 100. - converter.add_plot(plots[metric_name], "histogram") - - -def _get_tomography_spatial_center_and_size(tomo_df): + for metric in plots: + values = [] + for _, fiber in tomo_df.groupby("id"): + polymer_trace = fiber[["xpos", "ypos", "zpos"]].values + values.append(metric.calculate_metric( + polymer_trace=polymer_trace + )) + plots[metric].traces["actin"] = np.array(values) + if metric == CompressionMetric.COMPRESSION_RATIO: + plots[metric].traces["actin"] *= 100. + converter.add_plot(plots[metric], "histogram") + + +def _get_tomography_spatial_center_and_size(tomo_df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: """ Get the center and size of the tomography dataset in 3D space. """ From 27050ce1028ba256f6c052b5cd8029782dbe6db9 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 2 Jul 2024 18:45:54 -0700 Subject: [PATCH 09/63] WIP debugging tomography visualization and analysis --- environment.yml | 2 +- .../compression_metrics/polymer_trace.py | 6 ++ .../_analyze_actin_cme_tomography_data.py | 10 +-- .../tomography_data/tomography_data.py | 34 ++++++++--- subcell_pipeline/constants.py | 4 +- subcell_pipeline/simulation/readdy/parser.py | 4 +- .../simulation/readdy/post_processor.py | 2 +- subcell_pipeline/temporary_file_io.py | 22 ++++--- .../_visualize_tomography_data.py | 2 +- .../visualization/spatial_annotator.py | 18 ++++-- subcell_pipeline/visualization/visualizer.py | 61 ++++++++++++------- 11 files changed, 109 insertions(+), 56 deletions(-) diff --git a/environment.yml b/environment.yml index ed55bb5..ee8e7d2 100644 --- a/environment.yml +++ b/environment.yml @@ -1,3 +1,3 @@ -name: subcell_analysis +name: subcell_pipeline dependencies: - conda-forge::readdy diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index 97164af..e265419 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -207,6 +207,12 @@ def get_bending_energy_from_trace( vec1 = polymer_trace[ind + 1] - polymer_trace[ind] vec2 = polymer_trace[ind + 2] - polymer_trace[ind + 1] + if np.isclose(np.linalg.norm(vec1), 0.) or np.isclose(np.linalg.norm(vec2), 0.): + # TODO handle this differently? + cos_angle[ind] = 0. + print("Warning: zero vector in bending energy calculation.") + continue + cos_angle[ind] = ( np.dot(vec1, vec2) / np.linalg.norm(vec1) / np.linalg.norm(vec2) ) diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index d77d679..8732556 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -70,6 +70,7 @@ ] # %% +# TODO These datasets have different scales (see plots), which is correct? branched_df = get_branched_tomography_data( bucket, name, repository, branched_datasets, TOMOGRAPHY_SCALE_FACTOR ) @@ -83,7 +84,7 @@ """ # %% -plot_tomography_data_by_dataset(branched_df) +plot_tomography_data_by_dataset(branched_df, bucket, f"{name}/{name}_plots_branched.png") # %% [markdown] """ @@ -91,7 +92,7 @@ """ # %% -plot_tomography_data_by_dataset(unbranched_df) +plot_tomography_data_by_dataset(unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png") # %% [markdown] """ @@ -102,7 +103,7 @@ # %% # Number of monomer points per fiber -n_monomer_points = 200 +n_monomer_points = 20 # Minimum number of points for valid fiber minimum_points = 3 @@ -119,6 +120,7 @@ # %% sampled_key = f"{name}/{name}_coordinates_sampled.csv" +# TODO scale properly before concat (or do we not want to analyze both datasets?) all_tomogram_df = pd.concat([branched_df, unbranched_df]) sampled_data = sample_tomography_data( all_tomogram_df, bucket, sampled_key, n_monomer_points, minimum_points @@ -130,4 +132,4 @@ """ # %% -plot_tomography_data_by_dataset(sampled_data) +plot_tomography_data_by_dataset(sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png") diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index c01cf35..b57d5a5 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -1,3 +1,5 @@ +import os + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -5,7 +7,8 @@ from io_collection.load.load_dataframe import load_dataframe from io_collection.save.save_dataframe import save_dataframe -from ...constants import TOMOGRAPHY_SAMPLE_COLUMNS +from ...constants import TOMOGRAPHY_SAMPLE_COLUMNS, WORKING_DIR_PATH +from ...temporary_file_io import make_working_directory, upload_file_to_s3 def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: @@ -32,7 +35,7 @@ def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: elif len(coordinates.columns) == 5: coordinates.columns = ["object", label, "xpos", "ypos", "zpos"] else: - print("Data file [ {file} ] has an unexpected number of columns") + print(f"Data file [ {file} ] has an unexpected number of columns") return coordinates @@ -58,7 +61,7 @@ def get_branched_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], scale_factor: float = 1.0, ) -> pd.DataFrame: """ @@ -92,7 +95,7 @@ def get_unbranched_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], scale_factor: float = 1.0, ) -> pd.DataFrame: """ @@ -126,7 +129,7 @@ def get_tomography_data( bucket: str, name: str, repository: str, - datasets: "list[tuple[str, str]]", + datasets: list[tuple[str, str]], group: str, scale_factor: float = 1.0, ) -> pd.DataFrame: @@ -187,7 +190,7 @@ def sample_tomography_data( save_key: str, n_monomer_points: int, minimum_points: int, - sampled_columns: list[str] = SAMPLE_COLUMNS, + sampled_columns: list[str] = TOMOGRAPHY_SAMPLE_COLUMNS, ) -> pd.DataFrame: """ Sample selected columns from tomography data at given resolution. @@ -219,9 +222,15 @@ def sample_tomography_data( else: all_sampled_points = [] + # TODO sort experimental samples in order along the fiber before resampling + # (see simularium visualization) + for fiber_id, group in data.groupby("id"): if len(group) < minimum_points: continue + + # TODO resample uniformly along the fiber length rather than + # uniformly between experimental samples sampled_points = pd.DataFrame() sampled_points["monomer_ids"] = np.arange(n_monomer_points) @@ -245,7 +254,7 @@ def sample_tomography_data( return all_sampled_df -def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: +def plot_tomography_data_by_dataset(data: pd.DataFrame, bucket: str, output_key: str) -> None: """ Plot tomography data for each dataset. @@ -253,8 +262,14 @@ def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: ---------- data Tomography data. + bucket: + Where to upload the results. + output_key + File key for results. """ - + make_working_directory() + local_save_path = os.path.join(WORKING_DIR_PATH, os.path.basename(output_key)) + for dataset, group in data.groupby("dataset"): _, ax = plt.subplots(1, 3, figsize=(6, 2)) @@ -272,4 +287,5 @@ def plot_tomography_data_by_dataset(data: pd.DataFrame) -> None: ax[1].plot(fiber["xpos"], fiber["zpos"], marker="o", ms=1, lw=1) ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) - plt.show() + plt.savefig(local_save_path) + upload_file_to_s3(bucket, local_save_path, output_key) diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py index b50d544..75c0856 100644 --- a/subcell_pipeline/constants.py +++ b/subcell_pipeline/constants.py @@ -7,7 +7,7 @@ from simulariumio import DisplayData, DISPLAY_TYPE -LOCAL_DOWNLOADS_PATH: str = "aws_downloads/" +WORKING_DIR_PATH: str = "data/" COLUMN_NAMES: List[str] = [ "fiber_id", @@ -199,7 +199,7 @@ def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] -TOMOGRAPHY_VIZ_SCALE: float = 0.1 +TOMOGRAPHY_VIZ_SCALE: float = 1000. TOMOGRAPHY_MIN_COMPRESSION: int = 2 diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index 19e5b81..78c7cd7 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -20,7 +20,7 @@ ACTIN_PARTICLE_TYPES, IDEAL_ACTIN_POSITIONS, IDEAL_ACTIN_VECTOR_TO_AXIS, - LOCAL_DOWNLOADS_PATH, + WORKING_DIR_PATH, ) from ...temporary_file_io import download_readdy_hdf5 @@ -49,7 +49,7 @@ def readdy_post_processor( n_timepoints Number of timepoints to visualize. """ - h5_file_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + h5_file_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") rep_id = rep_ix + 1 pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" time_inc = READDY_TOTAL_STEPS[series_key] / n_timepoints diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index ac6510f..540b768 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -8,7 +8,7 @@ from numpy import ndarray from tqdm import tqdm -from ..compression_analysis import get_contour_length_from_trace +from ...analysis.compression_metrics.polymer_trace import get_contour_length_from_trace from .data_structures import FrameData diff --git a/subcell_pipeline/temporary_file_io.py b/subcell_pipeline/temporary_file_io.py index 92d38c8..f5fb515 100644 --- a/subcell_pipeline/temporary_file_io.py +++ b/subcell_pipeline/temporary_file_io.py @@ -6,15 +6,19 @@ import boto3 from botocore.exceptions import ClientError -from .constants import LOCAL_DOWNLOADS_PATH +from .constants import WORKING_DIR_PATH s3_client = boto3.client("s3") -def _make_download_dir() -> None: - if not os.path.isdir(LOCAL_DOWNLOADS_PATH): - os.makedirs(LOCAL_DOWNLOADS_PATH) +def make_working_directory() -> None: + """ + Make a local working directory at the + WORKING_DIR_PATH. + """ + if not os.path.isdir(WORKING_DIR_PATH): + os.makedirs(WORKING_DIR_PATH) def _download_s3_file( @@ -62,8 +66,9 @@ def download_readdy_hdf5( replicate_ix Replicate index. """ + make_working_directory() aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" - local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") return _download_s3_file(bucket, aws_h5_key, local_h5_path) @@ -87,14 +92,14 @@ def download_all_readdy_outputs( n_replicates Number of simulation replicates. """ - _make_download_dir() + make_working_directory() for condition_key in condition_keys: series_key = f"{series_name}_{condition_key}" if condition_key else series_name for rep_ix in range(n_replicates): - local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") # Skip if file already exists. if os.path.isfile(local_h5_path): @@ -102,7 +107,7 @@ def download_all_readdy_outputs( continue aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" - download_s3_file(bucket, aws_h5_key, local_h5_path) + _download_s3_file(bucket, aws_h5_key, local_h5_path) print(f"Downloaded data for [ {condition_key} ] replicate [ {rep_ix} ]") @@ -124,6 +129,7 @@ def upload_file_to_s3(bucket: str, src_path: str, s3_path: str) -> bool: print(f"!!! File does not exist to upload {src_path}") return False try: + bucket = bucket.split("s3://")[-1] s3_client.upload_file(src_path, bucket, s3_path) print(f"Uploaded to {s3_path}") return True diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py index 980bf2f..e432405 100644 --- a/subcell_pipeline/visualization/_visualize_tomography_data.py +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -21,7 +21,7 @@ Visualize segmented tomography data for actin fibers. - Input: `(name)/(name)_coordinates_sampled.csv` -- Output: `(name).simularium` +- Output: `(name)/(name).simularium` """ # %% diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index db6cb84..b42369d 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -158,9 +158,12 @@ def add_sphere_agents( SpatialAnnotator._added_dimensions_for_spheres(sphere_positions) ) max_used_uid = max(list(np.unique(traj_data.agent_data.unique_ids))) + max_spheres = 0 for time_ix in range(total_steps): start_ix = int(traj_data.agent_data.n_agents[time_ix]) n_spheres = len(sphere_positions[time_ix]) + if n_spheres > max_spheres: + max_spheres = n_spheres end_ix = start_ix + n_spheres new_agent_data.unique_ids[time_ix][start_ix:end_ix] = np.arange( max_used_uid + 1, max_used_uid + 1 + n_spheres @@ -169,15 +172,18 @@ def add_sphere_agents( new_agent_data.viz_types[time_ix][start_ix:end_ix] = n_spheres * [ VIZ_TYPE.DEFAULT ] - new_agent_data.types[time_ix] += n_spheres * [type_name] + new_agent_data.types[time_ix] += [f"{type_name} {ix}" for ix in range(n_spheres)] new_agent_data.positions[time_ix][start_ix:end_ix] = sphere_positions[ time_ix ][:n_spheres] new_agent_data.radii[time_ix][start_ix:end_ix] = n_spheres * [radius] - new_agent_data.display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.SPHERE, - color=color, - ) + colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] + for ix in range(max_spheres): + tn = f"{type_name} {ix}" + new_agent_data.display_data[tn] = DisplayData( + name=tn, + display_type=DISPLAY_TYPE.SPHERE, + color=colors[ix % len(colors)], + ) traj_data.agent_data = new_agent_data return traj_data diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index f11cfe6..e1b5fe6 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -16,7 +16,6 @@ DisplayData, DISPLAY_TYPE, UnitData, - EveryNthTimestepFilter, ScatterPlotData, HistogramPlotData, CameraData, @@ -24,11 +23,12 @@ AgentData, DimensionData, ) +from simulariumio.filters import EveryNthTimestepFilter from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo from simulariumio.readdy import ReaddyConverter, ReaddyData from ..constants import ( BOX_SIZE, - LOCAL_DOWNLOADS_PATH, + WORKING_DIR_PATH, READDY_TIMESTEP, READDY_TOTAL_STEPS, READDY_SAVED_FRAMES, @@ -42,7 +42,8 @@ from ..temporary_file_io import ( download_readdy_hdf5, - upload_file_to_s3 + upload_file_to_s3, + make_working_directory, ) from ..constants import ( BOX_SIZE, @@ -156,7 +157,7 @@ def _generate_plot_data(fiber_points: np.ndarray) -> Dict[CompressionMetric, lis def _add_individual_plots( converter: TrajectoryConverter, - fiber_points: np.ndarry, + fiber_points: np.ndarray, times: np.ndarray, ) -> None: """ @@ -251,7 +252,7 @@ def _visualize_readdy_trajectory( """ Save a Simularium file for a single ReaDDy trajectory with plots and spatial annotations. """ - path_to_readdy_h5 = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + path_to_readdy_h5 = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") converter = _load_readdy_simularium(path_to_readdy_h5, series_key) # load data shaped for analysis from a pickle if it exists, otherwise save one @@ -303,7 +304,7 @@ def visualize_individual_readdy_trajectories( series_key = f"{series_name}_{condition_key}" if condition_key else series_name for rep_ix in range(n_replicates): - local_h5_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{series_key}_{rep_ix}.h5") + local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") rep_id = rep_ix + 1 output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" @@ -471,7 +472,7 @@ def visualize_individual_cytosim_trajectories( singles_data = load_text( bucket, output_key_template % "singles.txt" ) - local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + local_output_path = os.path.join(WORKING_DIR_PATH, output_key) _visualize_cytosim_trajectory( fiber_points_data, singles_data, local_output_path, n_timepoints ) @@ -551,7 +552,7 @@ def _load_all_together_simularium( def _add_combined_plots( converter: TrajectoryConverter, - fiber_points: np.ndarry, + fiber_points: np.ndarray, type_names: list[str], n_timepoints: int, ) -> None: @@ -636,7 +637,7 @@ def visualize_all_compressed_trajectories_together( converter = _load_all_together_simularium(fiber_points, type_names, display_data) _add_combined_plots(converter, fiber_points, type_names, n_timepoints) output_key = "actin_compression_cytosim_readdy.simularium" - local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + local_output_path = os.path.join(WORKING_DIR_PATH, output_key) converter.save(local_output_path) upload_file_to_s3(subcell_bucket, local_output_path, output_key) @@ -723,7 +724,7 @@ def _load_tomography_simularium(bucket: str, name: str) -> TrajectoryConverter: """ tomo_key = f"{name}/{name}_coordinates_sampled.csv" tomo_df = load_dataframe(bucket, tomo_key) - tomo_df = tomo_df.sort_values(by=["id"]) + tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) tomo_df = tomo_df.reset_index(drop=True) names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) traj_ids = names[np.argsort(ids)] @@ -731,28 +732,33 @@ def _load_tomography_simularium(bucket: str, name: str) -> TrajectoryConverter: max_points = 0 subpoints = [] compression_ratios = [] + test_points = [] for traj_id in traj_ids: fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] - points = np.array(fiber_df[["xpos", "ypos", "zpos"]]) - center - subpoints.append(TOMOGRAPHY_VIZ_SCALE * points.flatten()) - compression_ratios.append(list(fiber_df["id"])[0]) + points = TOMOGRAPHY_VIZ_SCALE * (np.array(fiber_df[["xpos", "ypos", "zpos"]]) - center) + subpoints.append(points.flatten()) + compression_ratio = CompressionMetric.COMPRESSION_RATIO.calculate_metric(points) + compression_ratios.append(compression_ratio) + if compression_ratio > 0.75: + test_points.append(points) if len(fiber_df) > max_points: max_points = len(fiber_df) n_agents = len(subpoints) compression_percents = 100. * np.array(compression_ratios) min_compression_ratio = np.amin(compression_percents) max_compression_ratio = np.amax(compression_percents) - bins = np.linspace(min_compression_ratio, max_compression_ratio, 100) + bins = np.linspace(min_compression_ratio, max_compression_ratio, 20) digitized = np.digitize(compression_percents, bins) type_names = [] display_data = {} - type_name_min = f"actin less than {TOMOGRAPHY_MIN_COMPRESSION}.0 percent compressed" + type_name_min = f"actin less than {TOMOGRAPHY_MIN_COMPRESSION} percent compressed" for agent_ix in range(n_agents): bin_percent = int(10 * bins[digitized[agent_ix] - 1]) / 10. if bin_percent < TOMOGRAPHY_MIN_COMPRESSION: type_name = type_name_min else: - type_name = f"actin {bin_percent} percent compressed" + bin_percent_name = str(round(bin_percent)) + type_name = f"actin {bin_percent_name} percent compressed" type_names.append(type_name) if type_name not in display_data: display_data[type_name] = DisplayData( @@ -779,17 +785,27 @@ def _load_tomography_simularium(bucket: str, name: str) -> TrajectoryConverter: agent_data.n_subpoints[0][agent_ix] = n_subpoints agent_data.subpoints[0][agent_ix][:n_subpoints] = subpoints[agent_ix] agent_data.display_data = display_data - UNIT_SCALE_FACTOR = 10 / 2. # not sure where this factor came from or if it is still needed + UNIT_SCALE_FACTOR = 1 / 20. # TODO not sure where this factor came from or if it is still needed traj_data = TrajectoryData( meta_data=MetaData( box_size=TOMOGRAPHY_VIZ_SCALE * box_size, camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])) ), agent_data=agent_data, - spatial_units=UnitData("um", UNIT_SCALE_FACTOR * TOMOGRAPHY_SCALE_FACTOR / TOMOGRAPHY_VIZ_SCALE), # 0.003 + spatial_units=UnitData("um", UNIT_SCALE_FACTOR * TOMOGRAPHY_SCALE_FACTOR / TOMOGRAPHY_VIZ_SCALE), # 0.003? ) converter = TrajectoryConverter(traj_data) _add_tomography_plots(tomo_df, converter) + + # TODO remove after debugging fiber point sampling order + for ix, points in enumerate(test_points): + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + [points], + type_name=f"fiber point", + radius=0.8, + ) + return converter @@ -801,12 +817,13 @@ def visualize_tomography(bucket: str, name: str) -> None: ---------- data Tomography data. - """ - output_key = f"{name}.simularium" - local_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, output_key) + """ + output_key = f"{name}/{name}" + local_output_path = os.path.join(WORKING_DIR_PATH, name) + make_working_directory() converter = _load_tomography_simularium(bucket, name) converter.save(local_output_path) - upload_file_to_s3(bucket, local_output_path, output_key) + upload_file_to_s3(bucket, f"{local_output_path}.simularium", f"{output_key}.simularium") \ No newline at end of file From 97b09e31bf8f8f32b50327d3ae24b7aff0da1e2d Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 8 Jul 2024 16:32:15 -0700 Subject: [PATCH 10/63] dim reduction viz, tomography viz in separate files --- .../_run_pca_on_compression_simulations.py | 2 +- .../_visualize_dimensionality_reduction.py | 107 ++++++ .../visualization/spatial_annotator.py | 3 + subcell_pipeline/visualization/visualizer.py | 323 ++++++++++-------- 4 files changed, 290 insertions(+), 145 deletions(-) create mode 100644 subcell_pipeline/visualization/_visualize_dimensionality_reduction.py diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py index 14596d1..bdcd38c 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py @@ -81,7 +81,7 @@ """ # %% -readdy_data = get_merged_data(readdy_bucket, series_name, condition_keys, random_seeds) +readdy_data = get_merged_data(readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds) readdy_data["simulator"] = "readdy" # %% diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py new file mode 100644 index 0000000..70b7c74 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -0,0 +1,107 @@ +# %% [markdown] +# # Visualize dimensionality reduction analysis of actin filaments + +# %% [markdown] +""" + +Notebook contains steps for visualizing PCA space +for actin fibers. + +- [Pre-process Inputs](#pre-process-inputs) +- [Visualize Inverse PCA](#visualize-inverse-pca) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + + +# %% [markdown] +""" +## Pre-process Inputs + +If more analysis outputs for PCA are saved in S3, this will no longer be necessary. + +- Input: `(series_name)/analysis/(series_name)_(align_key).csv` (for Cytosim and ReaDDy) +- Output: `actin_compression_pca_results.csv` and `actin_compression_pca.pkl` +""" + +# %% +import pandas as pd +from io_collection.save.save_dataframe import save_dataframe +from io_collection.save.save_pickle import save_pickle +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import get_merged_data +from subcell_pipeline.analysis.dimensionality_reduction.pca_dim_reduction import run_pca + +# Name of the simulation series +series_name: str = "COMPRESSION_VELOCITY" + +# S3 bucket for input and output files +bucket = "s3://subcell-working-bucket" + +# S3 bucket Cytosim for input and output files +cytosim_bucket: str = "s3://cytosim-working-bucket" + +# S3 bucket ReaDDy for input and output files +readdy_bucket: str = "s3://readdy-working-bucket" + +# Random seeds for simulations +random_seeds: list[int] = [1, 2, 3, 4, 5] + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +readdy_data = get_merged_data(readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds) +readdy_data["simulator"] = "readdy" + +cytosim_data = get_merged_data( + cytosim_bucket, series_name, condition_keys, random_seeds +) +cytosim_data["simulator"] = "cytosim" + +data = pd.concat([cytosim_data, readdy_data]) +data["repeat"] = data["seed"] - 1 +data["velocity"] = data["key"].astype("int") / 10 + +time_map = { + ("cytosim", "0047"): 0.031685, + ("cytosim", "0150"): 0.01, + ("cytosim", "0470"): 0.00316, + ("cytosim", "1500"): 0.001, + ("readdy", "0047"): 1000, + ("readdy", "0150"): 1000, + ("readdy", "0470"): 1000, + ("readdy", "1500"): 1000, +} + +pca_results, pca = run_pca(data) + +save_dataframe(bucket, "actin_compression_pca_results.csv", pca_results) +save_pickle(bucket, "actin_compression_pca.pkl", pca) + +# %% [markdown] +""" +## Visualize Inverse PCA + +Visualize PCA space for actin fibers. + +- Input: `actin_compression_pca_results.csv` and `actin_compression_pca.pkl` +- Output: `(name)/(name).simularium` +""" + +# %% +from subcell_pipeline.visualization.visualizer import ( + visualize_dimensionality_reduction, +) + +# %% +# S3 bucket for input and output files +bucket = "s3://subcell-working-bucket" + +# File key for PCA results dataframe +pca_results_key = "actin_compression_pca_results.csv" + +# File key for PCA object pickle +pca_pickle_key = "actin_compression_pca.pkl" + +visualize_dimensionality_reduction(bucket, pca_results_key, pca_pickle_key) diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index b42369d..4ede56c 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -177,7 +177,10 @@ def add_sphere_agents( time_ix ][:n_spheres] new_agent_data.radii[time_ix][start_ix:end_ix] = n_spheres * [radius] + + # TODO use color parameter after finished debugging colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] + for ix in range(max_spheres): tn = f"{type_name} {ix}" new_agent_data.display_data[tn] = DisplayData( diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index e1b5fe6..eb14b95 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -5,10 +5,12 @@ import numpy as np import pandas as pd +import matplotlib.pyplot as plt from pint import UnitRegistry from io_collection.keys.check_key import check_key from io_collection.load.load_text import load_text from io_collection.load.load_dataframe import load_dataframe +from io_collection.load.load_pickle import load_pickle from simulariumio import ( TrajectoryConverter, MetaData, @@ -33,6 +35,7 @@ READDY_TOTAL_STEPS, READDY_SAVED_FRAMES, READDY_DISPLAY_DATA, + CYTOSIM_SCALE_FACTOR, SIMULATOR_COLORS, TOMOGRAPHY_SAMPLE_COLUMNS, TOMOGRAPHY_VIZ_SCALE, @@ -45,11 +48,6 @@ upload_file_to_s3, make_working_directory, ) -from ..constants import ( - BOX_SIZE, - READDY_TOTAL_STEPS, - CYTOSIM_SCALE_FACTOR, -) from subcell_pipeline.analysis.compression_metrics.compression_metric import ( CompressionMetric, ) @@ -248,7 +246,7 @@ def _visualize_readdy_trajectory( rep_ix: int, n_timepoints: int, n_monomer_points: int, -) -> None: +) -> TrajectoryConverter: """ Save a Simularium file for a single ReaDDy trajectory with plots and spatial annotations. """ @@ -263,12 +261,25 @@ def _visualize_readdy_trajectory( _add_readdy_spatial_annotations( converter, post_processor, fiber_chain_ids, axis_positions, fiber_points ) - - # save simularium file - converter.save( - output_path=path_to_readdy_h5, - validate_ids=False, # for performance - ) + + return converter + + +def _save_and_upload_simularium_file( + converter: TrajectoryConverter, + bucket: str, + output_key: str +) -> None: + """ + Save a local simularium file and upload it to s3. + """ + local_key = os.path.splitext(os.path.basename(output_key))[0] + local_output_path = os.path.join(WORKING_DIR_PATH, local_key) + make_working_directory() + + converter.save(local_output_path) + + # upload_file_to_s3(bucket, f"{local_output_path}.simularium", output_key) TODO def visualize_individual_readdy_trajectories( @@ -304,7 +315,6 @@ def visualize_individual_readdy_trajectories( series_key = f"{series_name}_{condition_key}" if condition_key else series_name for rep_ix in range(n_replicates): - local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") rep_id = rep_ix + 1 output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" @@ -316,8 +326,7 @@ def visualize_individual_readdy_trajectories( print(f"Visualizing data for [ {condition_key} ] replicate [ {rep_ix} ]") download_readdy_hdf5(bucket, series_name, series_key, rep_ix) - - _visualize_readdy_trajectory( + converter = _visualize_readdy_trajectory( bucket, series_name, series_key, @@ -325,8 +334,7 @@ def visualize_individual_readdy_trajectories( n_timepoints, n_monomer_points, ) - - upload_file_to_s3(bucket, f"{local_h5_path}.simularium", output_key) + _save_and_upload_simularium_file(converter, bucket, output_key) ureg = UnitRegistry() @@ -415,7 +423,7 @@ def _visualize_cytosim_trajectory( singles_data: str, local_output_path: str, n_timepoints: int, -) -> None: +) -> TrajectoryConverter: """ Save a Simularium file for a single Cytosim trajectory with plots. """ @@ -425,7 +433,7 @@ def _visualize_cytosim_trajectory( converter._data.agent_data.subpoints, converter._data.agent_data.times ) - converter.save(local_output_path) + return converter def visualize_individual_cytosim_trajectories( @@ -473,11 +481,10 @@ def visualize_individual_cytosim_trajectories( bucket, output_key_template % "singles.txt" ) local_output_path = os.path.join(WORKING_DIR_PATH, output_key) - _visualize_cytosim_trajectory( + converter = _visualize_cytosim_trajectory( fiber_points_data, singles_data, local_output_path, n_timepoints ) - - upload_file_to_s3(bucket, local_output_path, output_key) + _save_and_upload_simularium_file(converter, bucket, output_key) def _load_fiber_points_from_dataframe( @@ -507,47 +514,77 @@ def _load_fiber_points_from_dataframe( ) ) return np.array(result) - - -def _load_all_together_simularium( + + +def _generate_simularium_for_fiber_points( fiber_points: list[np.ndarray], type_names: list[str], + meta_data: MetaData, display_data: Dict[str, DisplayData], + time_units: UnitData, + spatial_units: UnitData, ) -> TrajectoryConverter: """ - Generate a TrajectoryConverter with all simulations from ReaDDy and Cytosim together. + Generate a TrajectoryConverter for the fiber_points + (list of fibers, each = timesteps X points X 3) """ - total_conditions = len(fiber_points) + # build subpoints array with correct dimensions + n_fibers = len(fiber_points) total_steps = fiber_points[0].shape[0] - n_monomer_points = fiber_points[0].shape[1] - subpoints = [] + n_points = fiber_points[0].shape[1] + subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) + for time_ix in range(total_steps): + for fiber_ix in range(n_fibers): + subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] + subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) + # convert to simularium traj_data = TrajectoryData( - meta_data=MetaData( - box_size=np.array([BOX_SIZE, BOX_SIZE, BOX_SIZE]), - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin compression in Cytosim and Readdy", - ), + meta_data=meta_data, agent_data=AgentData( times=np.arange(total_steps), - n_agents=total_conditions * np.ones((total_steps)), - viz_types=1001 - * np.ones((total_steps, total_conditions)), # fiber viz type = 1001 - unique_ids=np.array(total_steps * [list(range(total_conditions))]), + n_agents=n_fibers * np.ones((total_steps)), + viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(n_fibers))]), types=total_steps * [type_names], - positions=np.zeros((total_steps, total_conditions, 3)), - radii=np.ones((total_steps, total_conditions)), - n_subpoints=3 * n_monomer_points * np.ones((total_steps, total_conditions)), - subpoints=align(subpoints), + positions=np.zeros((total_steps, n_fibers, 3)), + radii=0.5 * np.ones((total_steps, n_fibers)), + n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), + subpoints=subpoints, display_data=display_data, ), - time_units=UnitData("count"), # frames - spatial_units=UnitData("nm"), # nanometer + time_units=time_units, + spatial_units=spatial_units, ) return TrajectoryConverter(traj_data) + + +def _load_all_together_simularium( + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: Dict[str, DisplayData], +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter with all simulations from ReaDDy and Cytosim together. + """ + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([10.0, 0.0, 200.0]), + look_at_position=np.array([10.0, 0.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin compression in Cytosim and Readdy", + ) + time_units=UnitData("count") # frames + spatial_units=UnitData("nm") # nanometer + return _generate_simularium_for_fiber_points( + fiber_points, + type_names, + meta_data, + display_data, + time_units, + spatial_units, + ) def _add_combined_plots( @@ -637,10 +674,7 @@ def visualize_all_compressed_trajectories_together( converter = _load_all_together_simularium(fiber_points, type_names, display_data) _add_combined_plots(converter, fiber_points, type_names, n_timepoints) output_key = "actin_compression_cytosim_readdy.simularium" - local_output_path = os.path.join(WORKING_DIR_PATH, output_key) - converter.save(local_output_path) - - upload_file_to_s3(subcell_bucket, local_output_path, output_key) + _save_and_upload_simularium_file(converter, subcell_bucket, output_key) def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: @@ -678,17 +712,16 @@ def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: } -def _add_tomography_plots(tomo_df: pd.DataFrame, converter: TrajectoryConverter) -> None: +def _add_tomography_plots(fiber_points: list[np.ndarray], converter: TrajectoryConverter) -> None: """ Add plots to tomography data using pre-calculated metrics. """ plots = _empty_tomography_plots() for metric in plots: values = [] - for _, fiber in tomo_df.groupby("id"): - polymer_trace = fiber[["xpos", "ypos", "zpos"]].values + for fiber in fiber_points: values.append(metric.calculate_metric( - polymer_trace=polymer_trace + polymer_trace=fiber )) plots[metric].traces["actin"] = np.array(values) if metric == CompressionMetric.COMPRESSION_RATIO: @@ -715,115 +748,117 @@ def _get_tomography_spatial_center_and_size(tomo_df: pd.DataFrame) -> Tuple[np.n mins = np.array(mins) maxs = np.array(maxs) return mins + 0.5 * (maxs - mins), maxs - mins - - -def _load_tomography_simularium(bucket: str, name: str) -> TrajectoryConverter: + + +def visualize_tomography(bucket: str, name: str) -> None: """ - Load sampled tomography data and generate a TrajectoryConverter - to visualize it in Simularium. + Visualize segmented tomography data for actin fibers. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + name + Name of tomography dataset. """ tomo_key = f"{name}/{name}_coordinates_sampled.csv" tomo_df = load_dataframe(bucket, tomo_key) tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) tomo_df = tomo_df.reset_index(drop=True) + time_units = UnitData("count") + spatial_units = UnitData("um", 0.003) names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) traj_ids = names[np.argsort(ids)] - center, box_size = _get_tomography_spatial_center_and_size(tomo_df) - max_points = 0 - subpoints = [] - compression_ratios = [] - test_points = [] for traj_id in traj_ids: fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] - points = TOMOGRAPHY_VIZ_SCALE * (np.array(fiber_df[["xpos", "ypos", "zpos"]]) - center) - subpoints.append(points.flatten()) - compression_ratio = CompressionMetric.COMPRESSION_RATIO.calculate_metric(points) - compression_ratios.append(compression_ratio) - if compression_ratio > 0.75: - test_points.append(points) - if len(fiber_df) > max_points: - max_points = len(fiber_df) - n_agents = len(subpoints) - compression_percents = 100. * np.array(compression_ratios) - min_compression_ratio = np.amin(compression_percents) - max_compression_ratio = np.amax(compression_percents) - bins = np.linspace(min_compression_ratio, max_compression_ratio, 20) - digitized = np.digitize(compression_percents, bins) - type_names = [] - display_data = {} - type_name_min = f"actin less than {TOMOGRAPHY_MIN_COMPRESSION} percent compressed" - for agent_ix in range(n_agents): - bin_percent = int(10 * bins[digitized[agent_ix] - 1]) / 10. - if bin_percent < TOMOGRAPHY_MIN_COMPRESSION: - type_name = type_name_min - else: - bin_percent_name = str(round(bin_percent)) - type_name = f"actin {bin_percent_name} percent compressed" - type_names.append(type_name) - if type_name not in display_data: - display_data[type_name] = DisplayData( - name=type_name, + center, box_size = _get_tomography_spatial_center_and_size(fiber_df) + fiber_points = TOMOGRAPHY_VIZ_SCALE * (np.array([fiber_df[["xpos", "ypos", "zpos"]]]) - center) + type_names = ["Raw data"] + display_data = { + "Raw data" : DisplayData( + name="Raw data", display_type=DISPLAY_TYPE.FIBER, + color="#888888", ) - display_data[type_name_min] = DisplayData( - name=type_name_min, - display_type=DISPLAY_TYPE.FIBER, - color="#222222", - ) - agent_data = AgentData.from_dimensions(DimensionData( - total_steps=1, - max_agents=n_agents, - max_subpoints=3 * max_points, - )) - agent_data.n_agents[0] = n_agents - agent_data.viz_types[0] = 1001.0 * np.ones(n_agents) - agent_data.unique_ids[0] = np.arange(n_agents) - agent_data.types[0] = type_names - agent_data.radii *= 0.5 - for agent_ix in range(n_agents): - n_subpoints = subpoints[agent_ix].shape[0] - agent_data.n_subpoints[0][agent_ix] = n_subpoints - agent_data.subpoints[0][agent_ix][:n_subpoints] = subpoints[agent_ix] - agent_data.display_data = display_data - UNIT_SCALE_FACTOR = 1 / 20. # TODO not sure where this factor came from or if it is still needed - traj_data = TrajectoryData( - meta_data=MetaData( - box_size=TOMOGRAPHY_VIZ_SCALE * box_size, - camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])) - ), - agent_data=agent_data, - spatial_units=UnitData("um", UNIT_SCALE_FACTOR * TOMOGRAPHY_SCALE_FACTOR / TOMOGRAPHY_VIZ_SCALE), # 0.003? - ) - converter = TrajectoryConverter(traj_data) - _add_tomography_plots(tomo_df, converter) - - # TODO remove after debugging fiber point sampling order - for ix, points in enumerate(test_points): + } + converter = _generate_simularium_for_fiber_points( + [fiber_points], + type_names, + MetaData( + box_size=TOMOGRAPHY_VIZ_SCALE * box_size, + camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])) + ), + display_data, + time_units, + spatial_units, + ) + + # TODO remove after debugging fiber point order converter._data = SpatialAnnotator.add_sphere_agents( converter._data, - [points], - type_name=f"fiber point", + [fiber_points[0]], + type_name="point", radius=0.8, ) - - return converter + + _add_tomography_plots([fiber_points[0]], converter) + _save_and_upload_simularium_file(converter, bucket, f"{name}/{name}_{traj_id}.simularium") -def visualize_tomography(bucket: str, name: str) -> None: +def visualize_dimensionality_reduction(bucket: str, pca_results_key: str, pca_pickle_key: str) -> None: """ - Visualize segmented tomography data for actin fibers. + Visualize PCA space for actin fibers. Parameters ---------- - data - Tomography data. + """ - output_key = f"{name}/{name}" - local_output_path = os.path.join(WORKING_DIR_PATH, name) - make_working_directory() + pca_results = load_dataframe(bucket, pca_results_key) + pca = load_pickle(bucket, pca_pickle_key) - converter = _load_tomography_simularium(bucket, name) - converter.save(local_output_path) - - upload_file_to_s3(bucket, f"{local_output_path}.simularium", f"{output_key}.simularium") - \ No newline at end of file + samples = np.arange(-2, 2, 0.5) + stdev_pc1 = pca_results["PCA1"].std(ddof=0) + stdev_pc2 = pca_results["PCA2"].std(ddof=0) + + fiber_points = [] + type_names = [] + display_data = {} + + for sample in samples: + data = { + "PC1" : [sample * stdev_pc1, 0], + "PC2" : [0, sample * stdev_pc2], + } + for pc in data: + fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) + type_name = f"{pc}#{sample}" + type_names.append(type_name) + if type_name not in display_data: + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + ) + + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([10.0, 0.0, 200.0]), + look_at_position=np.array([10.0, 0.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin Compression Dimensionality Reduction", + ) + time_units=UnitData("count") # frames + spatial_units=UnitData("nm") # nanometers + + converter = _generate_simularium_for_fiber_points( + fiber_points, + type_names, + meta_data, + display_data, + time_units, + spatial_units, + ) + + output_key = os.path.splitext(pca_pickle_key)[0] + _save_and_upload_simularium_file(converter, bucket, f"{output_key}.simularium") From 1ec35994e419d7dd835c7a7df492edc08e09a23d Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 9 Jul 2024 13:33:19 -0700 Subject: [PATCH 11/63] WIP adding options to pca viz --- .../_visualize_dimensionality_reduction.py | 10 ++- subcell_pipeline/visualization/visualizer.py | 86 ++++++++++++++----- 2 files changed, 72 insertions(+), 24 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index 70b7c74..9bd04f4 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -104,4 +104,12 @@ # File key for PCA object pickle pca_pickle_key = "actin_compression_pca.pkl" -visualize_dimensionality_reduction(bucket, pca_results_key, pca_pickle_key) +# Scroll through the PC distributions over time if True, otherwise show all together in one timestep +distribution_over_time = False + +# Also show distributions for ReaDDy and Cytosim if True, otherwise just all together +simulator_detail = False + +visualize_dimensionality_reduction( + bucket, pca_results_key, pca_pickle_key, distribution_over_time, simulator_detail +) diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py index eb14b95..a1d511f 100644 --- a/subcell_pipeline/visualization/visualizer.py +++ b/subcell_pipeline/visualization/visualizer.py @@ -48,9 +48,7 @@ upload_file_to_s3, make_working_directory, ) -from subcell_pipeline.analysis.compression_metrics.compression_metric import ( - CompressionMetric, -) +from ..analysis.compression_metrics.compression_metric import CompressionMetric from ..simulation.readdy import ReaddyPostProcessor, load_readdy_fiber_points from .spatial_annotator import SpatialAnnotator @@ -805,39 +803,81 @@ def visualize_tomography(bucket: str, name: str) -> None: _save_and_upload_simularium_file(converter, bucket, f"{name}/{name}_{traj_id}.simularium") -def visualize_dimensionality_reduction(bucket: str, pca_results_key: str, pca_pickle_key: str) -> None: +def visualize_dimensionality_reduction( + bucket: str, + pca_results_key: str, + pca_pickle_key: str, + distribution_over_time: bool, + simulator_detail: bool, + std_devs: float, + sample_resolution: int, +) -> None: """ Visualize PCA space for actin fibers. Parameters ---------- + bucket + Name of S3 bucket for input and output files. + pca_results_key + File key for PCA results dataframe. + pca_pickle_key + File key for PCA object pickle. + distribution_over_time + Scroll through the PC distributions over time? + Otherwise show all together in one timestep. + simulator_detail + Also show distributions for ReaDDy and Cytosim? + Otherwise just all together. + std_devs + How many standard deviations to visualize? + sample_resolution + How many samples to visualize for each PC distribution? + (should be an odd number) + """ + if sample_resolution % 2 == 0: + sample_resolution += 1 - """ pca_results = load_dataframe(bucket, pca_results_key) pca = load_pickle(bucket, pca_pickle_key) - - samples = np.arange(-2, 2, 0.5) - stdev_pc1 = pca_results["PCA1"].std(ddof=0) - stdev_pc2 = pca_results["PCA2"].std(ddof=0) fiber_points = [] type_names = [] display_data = {} + + inc = 2 * std_devs / (sample_resolution - 1) + samples = np.arange(-std_devs, std_devs + inc, inc) + stdev_pc1 = pca_results["PCA1"].std(ddof=0) + stdev_pc2 = pca_results["PCA2"].std(ddof=0) + data = { + "PC1" : [sample * stdev_pc1, 0], + "PC2" : [0, sample * stdev_pc2], + } - for sample in samples: - data = { - "PC1" : [sample * stdev_pc1, 0], - "PC2" : [0, sample * stdev_pc2], - } - for pc in data: - fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) - type_name = f"{pc}#{sample}" - type_names.append(type_name) - if type_name not in display_data: - display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.FIBER, - ) + if distribution_over_time: + + for pc_ix, pc in enumerate(data): + fiber_points.append([]) + pca.inverse_transform(data[pc]).reshape(-1, 3) + for sample in samples: + fiber_points[pc_ix].append() + fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) + + else: + + for sample in samples: + for pc in data: + + import ipdb; ipdb.set_trace() + + fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) + type_name = f"{pc}#{sample}" + type_names.append(type_name) + if type_name not in display_data: + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + ) meta_data=MetaData( box_size=BOX_SIZE, From 503c5b6013abd5e1e3b3430b0a67e6d43051727c Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:54:33 -0400 Subject: [PATCH 12/63] Update name to series_name in workflow docstrings --- .../cytosim/_process_cytosim_compression_simulations.py | 4 ++-- .../cytosim/_process_cytosim_no_compression_simulations.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py index 1fd47f8..a16c8ce 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_compression_simulations.py @@ -86,8 +86,8 @@ sample the timepoints and monomer points. If the sampled file for a given condition key and random seed already exists, sampling is skipped. -- Input: `(name)/data/(name)_(condition_key)_(seed).csv` -- Output: `(name)/samples/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(seed).csv` """ # %% diff --git a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py index 50bdeb5..edef84b 100644 --- a/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py +++ b/subcell_pipeline/simulation/cytosim/_process_cytosim_no_compression_simulations.py @@ -83,8 +83,8 @@ sample the timepoints and monomer points. If the sampled file for a given condition key and random seed already exists, sampling is skipped. -- Input: `(name)/data/(name)_(condition_key)_(seed).csv` -- Output: `(name)/samples/(name)_(condition_key)_(seed).csv` +- Input: `(series_name)/data/(series_name)_(condition_key)_(seed).csv` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(seed).csv` """ # %% From 1866f55aedcffbbffc028d5ed8ceeaf9f0878afe Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:52:52 -0400 Subject: [PATCH 13/63] Move copy readdy outputs into workflow notebooks --- .../simulation/batch_simulations.py | 44 +++++++++++++ subcell_pipeline/simulation/readdy/README.md | 17 ++--- ...un_readdy_compression_batch_simulations.py | 66 +++++++++++++++++++ ...readdy_no_compression_batch_simulations.py | 55 ++++++++++++++++ .../simulation/readdy/copy_readdy_outputs.py | 37 ----------- 5 files changed, 172 insertions(+), 47 deletions(-) create mode 100644 subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py create mode 100644 subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py delete mode 100644 subcell_pipeline/simulation/readdy/copy_readdy_outputs.py diff --git a/subcell_pipeline/simulation/batch_simulations.py b/subcell_pipeline/simulation/batch_simulations.py index ae6980f..377697b 100644 --- a/subcell_pipeline/simulation/batch_simulations.py +++ b/subcell_pipeline/simulation/batch_simulations.py @@ -1,12 +1,14 @@ """Methods for running simulations on AWS Batch.""" import re +from typing import Optional import boto3 from container_collection.batch.get_batch_logs import get_batch_logs from container_collection.batch.make_batch_job import make_batch_job from container_collection.batch.register_batch_job import register_batch_job from container_collection.batch.submit_batch_job import submit_batch_job +from io_collection.keys.copy_key import copy_key from io_collection.save.save_text import save_text @@ -230,3 +232,45 @@ def check_and_save_job_logs( logs = get_batch_logs(response["jobArn"], " ") save_text(bucket, log_key, logs) + + +def copy_simulation_outputs( + bucket: str, + series_name: str, + source_template: str, + n_replicates: int, + condition_keys: Optional[dict[str, str]] = None, +) -> None: + """ + Copy simulation outputs from where they are saved to pipeline file structure. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + source_template + Template string for source output files. + n_replicates : int + _Number of simulation replicates. + condition_keys + Map of source to target condition keys. + """ + + if condition_keys is None: + condition_keys = {"": ""} + + for index in range(n_replicates): + for source_condition, target_condition in condition_keys.items(): + if source_condition == "" and target_condition == "": + source_key = source_template % (index) + target_key = f"{series_name}/outputs/{series_name}_{index}.h5" + else: + source_key = source_template % (source_condition, index) + target_key = ( + f"{series_name}/outputs/{series_name}_{target_condition}_{index}.h5" + ) + + print(f"Copying [ {source_key} ] to [ {target_key} ]") + copy_key(bucket, source_key, target_key) diff --git a/subcell_pipeline/simulation/readdy/README.md b/subcell_pipeline/simulation/readdy/README.md index cdbe612..83520df 100644 --- a/subcell_pipeline/simulation/readdy/README.md +++ b/subcell_pipeline/simulation/readdy/README.md @@ -2,22 +2,19 @@ Simulations and processing for particle-based reaction-diffusion simulator [ReaDDy](https://readdy.github.io/). -## Run ReaDDy simulations (compression and no compression) - > - **Base simulator**: [https://github.com/readdy/readdy](https://github.com/readdy/readdy) > - **Model development**: [https://github.com/simularium/readdy-models](https://github.com/simularium/readdy-models) -- **Run ReaDDy compression simulations** ([source](https://github.com/simularium/readdy-models/tree/main/examples/actin) | [readme](https://github.com/simularium/readdy-models/blob/main/examples/README.md)) - - -## Process baseline single actin fiber with no compression +## Baseline single actin fiber with no compression -The `ACTIN_NO_COMPRESSION` simulation series simulates a single actin fiber with a free barbed end across five replicates. +The `NO_COMPRESSION` simulation series simulates a single actin fiber with a free barbed end across five replicates. +- **Run ReaDDy single fiber simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_run_readdy_no_compression_batch_simulations.html)) - **Process ReaDDy single fiber simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_no_compression_simulations.html)) -## Process single actin fiber compressed at different compression velocities +## Single actin fiber compressed at different compression velocities -The `ACTIN_COMPRESSION_VELOCITY` simulation series simulates compression of a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five replicates. +The `COMPRESSION_VELOCITY` simulation series simulates compression of a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five replicates. -- **Process Cytosim compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_compression_simulations.html)) +- **Run ReaDDy compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_run_readdy_compression_batch_simulations.html)) +- **Process ReaDDy compression simulations** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/simulation/readdy/_process_readdy_compression_simulations.html)) diff --git a/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py b/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py new file mode 100644 index 0000000..945216b --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_run_readdy_compression_batch_simulations.py @@ -0,0 +1,66 @@ +# %% [markdown] +# # Run ReaDDy compression simulations + +# %% [markdown] +""" +Notebook contains steps for running ReaDDy simulations in which a single actin +fiber is compressed at different compression velocities. + +Simulations use the ReaDDy actin model defined +[here](https://github.com/simularium/readdy-models/tree/main/examples/actin). +Instructions for running this model on AWS Batch are provided +[here](https://github.com/simularium/readdy-models/blob/main/examples/README.md). + +After simulations are complete, use this notebook to copy output files into the +file structure used by this pipeline. + +- [Define simulation conditions](#define-simulation-conditions) +- [Copy simulation outputs](#copy-simulation-outputs) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.batch_simulations import copy_simulation_outputs + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_COMPRESSION_VELOCITY` simulation series, which compresses a +single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 +μm/s) with five replicates each. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# Template for simulation output files +source_template: str = "outputs/actin_compression_velocity=%s_%d.h5" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# File keys for each velocity +velocity_keys: dict[str, str] = { + "4.7": "0047", + "15": "0150", + "47": "0470", + "150": "1500", +} + +# %% [markdown] +""" +## Copy simulation outputs +""" + +# %% +copy_simulation_outputs( + bucket, series_name, source_template, n_replicates, velocity_keys +) diff --git a/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py new file mode 100644 index 0000000..b43a295 --- /dev/null +++ b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py @@ -0,0 +1,55 @@ +# %% [markdown] +# # Run ReaDDy no compression simulations + +# %% [markdown] +""" +Notebook contains steps for running ReaDDy simulations for a baseline single +actin fiber with no compression. + +Simulations use the ReaDDy actin model defined +[here](https://github.com/simularium/readdy-models/tree/main/examples/actin). +Instructions for running this model on AWS Batch are provided +[here](https://github.com/simularium/readdy-models/blob/main/examples/README.md). + +After simulations are complete, use this notebook to copy output files into the +file structure used by this pipeline. + +- [Define simulation conditions](#define-simulation-conditions) +- [Copy simulation outputs](#copy-simulation-outputs) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from subcell_pipeline.simulation.batch_simulations import copy_simulation_outputs + +# %% [markdown] +""" +## Define simulation conditions + +Defines the `ACTIN_NO_COMPRESSION` simulation series, which simulates a single +actin fiber with a free barbed end across five replicates. +""" + +# %% +# Name of the simulation series +series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# Template for simulation output files +source_template: str = "outputs/actin_compression_baseline_%d.h5" + +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# %% [markdown] +""" +## Copy simulation outputs +""" + +# %% +copy_simulation_outputs(bucket, series_name, source_template, n_replicates) diff --git a/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py b/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py deleted file mode 100644 index 8701483..0000000 --- a/subcell_pipeline/simulation/readdy/copy_readdy_outputs.py +++ /dev/null @@ -1,37 +0,0 @@ -import boto3 - - -def copy_readdy_outputs(): - """ - Copy ReaDDy outputs from where they were saved from running - https://github.com/simularium/readdy-models to have the same - AWS S3 file structure as for Cytosim. - """ - s3_client = boto3.client("s3") - bucket = "readdy-working-bucket" - src_name = "outputs/actin_compression_velocity=" - dest_name = "ACTIN_COMPRESSION_VELOCITY/outputs/ACTIN_COMPRESSION_VELOCITY" - src_condition_keys = ["4.7", "15", "47", "150"] - dest_condition_keys = ["0047", "0150", "0470", "1500"] - n_replicates = 5 - - for cond_ix in range(len(src_condition_keys)): - for rep_ix in range(n_replicates): - - src_cond = src_condition_keys[cond_ix] - src_path = f"{bucket}/{src_name}{src_cond}_{rep_ix}.h5" - - dest_cond = dest_condition_keys[cond_ix] - dest_key = f"{dest_name}_{dest_cond}_{rep_ix}.h5" - - s3_client.copy_object( - Bucket=bucket, - CopySource=src_path, - Key=dest_key, - ) - - print(f"copied {src_path} to {bucket}/{dest_key}") - - -if __name__ == "__main__": - copy_readdy_outputs() \ No newline at end of file From 73c45272efdd58906acfee9fe06e72fc96e98519 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:53:43 -0400 Subject: [PATCH 14/63] Lint readdy processing notebooks --- ..._process_readdy_compression_simulations.py | 23 +++++++++--------- ...ocess_readdy_no_compression_simulations.py | 24 +++++++++---------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py index 0b0a3a6..366330b 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -3,7 +3,6 @@ # %% [markdown] """ - Notebook contains steps for post processing of ReaDDy simulations in which a single actin fiber is compressed at different compression velocities. @@ -20,18 +19,16 @@ if __name__ != "__main__": raise ImportError("This module is a notebook and is not meant to be imported") -# %% -from subcell_pipeline.simulation.readdy.parser import ( - parse_readdy_simulation_data, -) +# %% +from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data # %% [markdown] """ ## Define simulation conditions -Defines the `COMPRESSION_VELOCITY` simulation series, which compresses a single -500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with -five replicates each. +Defines the `ACTIN_COMPRESSION_VELOCITY` simulation series, which compresses a +single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 +μm/s) with five replicates each. """ # %% @@ -61,10 +58,12 @@ files and parse them into a tidy data format. If the parsed file for a given condition key and replicate already exists, parsing is skipped. -- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` -- Output: `(series_name)/data/(series_name)_(condition_key)_(index+1).csv` - and `(series_name)/data/(series_name)_(condition_key)_(index+1).pkl` +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index + 1).h5` +- Output: `(series_name)/data/(series_name)_(condition_key)_(index + 1).csv` and + `(series_name)/data/(series_name)_(condition_key)_(index + 1).pkl` """ # %% -parse_readdy_simulation_data(bucket, series_name, condition_keys, n_replicates, n_timepoints, n_monomer_points) +parse_readdy_simulation_data( + bucket, series_name, condition_keys, n_replicates, n_timepoints, n_monomer_points +) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py index 76b9162..318fa18 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -3,8 +3,8 @@ # %% [markdown] """ -Notebook contains steps for post processing of ReaDDy simulations for a -baseline single actin fiber with no compression. +Notebook contains steps for post processing of ReaDDy simulations for a baseline +single actin fiber with no compression. This notebook provides an example of processing a simulation series for a single condition with multiple replicates. For an example of processing a simulation @@ -19,17 +19,15 @@ if __name__ != "__main__": raise ImportError("This module is a notebook and is not meant to be imported") -# %% -from subcell_pipeline.simulation.readdy.parser import ( - parse_readdy_simulation_data, -) +# %% +from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data # %% [markdown] """ ## Define simulation conditions -Defines the `NO_COMPRESSION` simulation series, which simulates a single actin -fiber with a free barbed end across five replicates. +Defines the `ACTIN_NO_COMPRESSION` simulation series, which simulates a single +actin fiber with a free barbed end across five replicates. """ # %% @@ -52,13 +50,15 @@ """ ## Parse simulation data -Iterate through all replicates to load simulation output -files and parse them into a tidy data format. If the parsed file for a given -replicate already exists, parsing is skipped. +Iterate through all replicates to load simulation output files and parse them +into a tidy data format. If the parsed file for a given replicate already +exists, parsing is skipped. - Input: `(series_name)/outputs/(series_name)_(index+1).h5` - Output: `(series_name)/data/(series_name)_(index+1).csv` """ # %% -parse_readdy_simulation_data(bucket, series_name, [""], n_replicates, n_timepoints, n_monomer_points) +parse_readdy_simulation_data( + bucket, series_name, [""], n_replicates, n_timepoints, n_monomer_points +) From 5164acf935d88eb45316e7a38d258f8f05629523 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:22:05 -0400 Subject: [PATCH 15/63] Update docstrings in readdy data structures module --- pyproject.toml | 1 + .../simulation/readdy/data_structures.py | 102 +++++++----------- 2 files changed, 38 insertions(+), 65 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 14fcdfe..53a5e60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ extend-ignore = [ "D100", # Missing docstring in public module "D101", # Missing docstring in public class "D103", # Missing docstring in public function + "D105", # Missing docstring in magic method "D107", # Missing docstring in __init__ "D202", # Blank lines between the function body and the function docstring "D203", # 1 blank line required before class docstring diff --git a/subcell_pipeline/simulation/readdy/data_structures.py b/subcell_pipeline/simulation/readdy/data_structures.py index 93053a5..d955a92 100644 --- a/subcell_pipeline/simulation/readdy/data_structures.py +++ b/subcell_pipeline/simulation/readdy/data_structures.py @@ -1,35 +1,26 @@ -#!/usr/bin/env python - -from typing import Dict, List +from typing import Optional import numpy as np class TopologyData: + """Data class representing a ReaDDy topology of connected particles.""" + uid: int + """Unique ID of the topology from ReaDDy.""" + type_name: str - particle_ids: List[int] - - def __init__(self, uid: int, type_name: str, particle_ids: List[int]): - """ - Data class representing a ReaDDy topology of connected particles. - - - Parameters - ---------- - uid: int - Unique ID of the topology from ReaDDy. - type_name: str - ReaDDy type name of the topology. - particle_ids: List[int] - List of unique IDs of each particle in the topology. - """ + """ReaDDy type name of the topology.""" + + particle_ids: list[int] + """List of unique IDs of each particle in the topology.""" + + def __init__(self, uid: int, type_name: str, particle_ids: list[int]): self.uid = uid self.type_name = type_name self.particle_ids = particle_ids def __str__(self) -> str: - """String with all data.""" return ( "Topology(\n" f" id = {self.uid}\n" @@ -40,37 +31,29 @@ def __str__(self) -> str: class ParticleData: + """Data class representing a ReaDDy particle.""" + uid: int + """Unique ID of the particle from ReaDDy.""" + type_name: str + """ReaDDy type name of the particle.""" + position: np.ndarray - neighbor_ids: List[int] + """XYZ position of the particle.""" + + neighbor_ids: list[int] + """List of unique IDs of each neighbor particle connected by an edge.""" def __init__( - self, uid: int, type_name: str, position: np.ndarray, neighbor_ids: List[int] + self, uid: int, type_name: str, position: np.ndarray, neighbor_ids: list[int] ): - """ - Data class representing a ReaDDy particle. - - - Parameters - ---------- - uid: int - Unique ID of the particle from ReaDDy. - type_name: str - ReaDDy type name of the particle. - position: np.ndarray - XYZ position of the particle. - neighbor_ids: List[int] - List of unique IDs of each neighbor particle - connected by an edge. - """ self.uid = uid self.type_name = type_name self.position = position self.neighbor_ids = neighbor_ids def __str__(self) -> str: - """String with all data.""" return ( f"Particle(\n" f" id = {self.uid}\n" @@ -82,44 +65,33 @@ def __str__(self) -> str: class FrameData: + """Data class representing one ReaDDy timestep.""" + time: float - topologies: Dict[int, TopologyData] - particles: Dict[int, ParticleData] - edges: List[np.ndarray] + """Current time of the simulation for this frame.""" + + topologies: dict[int, TopologyData] + """Mapping of topology ID to a TopologyData for each topology.""" + + particles: dict[int, ParticleData] + """Mapping of particle ID to a ParticleData for each particle.""" + + edges: list[np.ndarray] + """List of edges as position of each of the two particles connected by the edge.""" def __init__( self, time: float, - topologies: Dict[int, TopologyData] = None, - particles: Dict[int, ParticleData] = None, - edges: List[np.ndarray] = None, + topologies: Optional[dict[int, TopologyData]] = None, + particles: Optional[dict[int, ParticleData]] = None, + edges: Optional[list[np.ndarray]] = None, ): - """ - Data class representing one ReaDDy timestep. - - - Parameters - ---------- - time: float - Current time of the simulation for this frame. - topologies: Dict[int, TopologyData] (optional) - Mapping of topology ID to a TopologyData for each topology. - Default: {} (added by ReaddyLoader._shape_trajectory_data()) - particles: Dict[int, ParticleData] (optional) - Mapping of particle ID to a ParticleData for each particle. - Default: {} (added by ReaddyLoader._shape_trajectory_data()) - edges: List[np.ndarray (shape = 2 x 3)] (optional) - List of edges as position of each of the two particles - connected by the edge. - Default: [] (added by ReaddyLoader._shape_trajectory_data()) - """ self.time = time self.topologies = topologies if topologies is not None else {} self.particles = particles if particles is not None else {} self.edges = edges if edges is not None else [] def __str__(self) -> str: - """String with topology and particle data.""" top_str = "\n" for top_id in self.topologies: top_str += f"{top_id} : \n{self.topologies[top_id]}\n" From b599032b55d3589a03a959cdcc2354104a50a34a Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:58:13 -0400 Subject: [PATCH 16/63] Update docstrings in readdy loader module --- subcell_pipeline/simulation/readdy/loader.py | 122 ++++++++++--------- 1 file changed, 63 insertions(+), 59 deletions(-) diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index 9d2b784..7f1a983 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -1,19 +1,51 @@ -#!/usr/bin/env python - -import os -from typing import Any, List, Optional +from typing import Any, Optional import numpy as np import readdy -from tqdm import tqdm from io_collection.keys.check_key import check_key from io_collection.load.load_pickle import load_pickle from io_collection.save.save_pickle import save_pickle +from tqdm import tqdm from .data_structures import FrameData, ParticleData, TopologyData class ReaddyLoader: + """ + Load and shape data from a ReaDDy trajectory. + + Trajectory is loaded from the simulation output h5 file of the .dat pickle + file. If a .dat pickle location and key are provided, the loaded trajectory + is saved to the given location for faster reloads. + """ + + _readdy_trajectory: Optional[readdy.Trajectory] + """ReaDDy trajectory object.""" + + _trajectory: Optional[list[FrameData]] + """List of FrameData for trajectory.""" + + h5_file_path: str + """Path to the ReaDDy .h5 file or .dat pickle file.""" + + min_time_ix: int + """First time index to include.""" + + max_time_ix: int + """Last time index to include.""" + + time_inc: int + """Include every time_inc timestep.""" + + timestep: float + """Real time for each simulation timestep.""" + + pickle_location: Optional[str] + """Location to save pickle file (AWS S3 bucket or local path).""" + + pickle_key: Optional[str] + """Name of pickle file (AWS S3 bucket or local path).""" + def __init__( self, h5_file_path: str, @@ -21,42 +53,11 @@ def __init__( max_time_ix: int = -1, time_inc: int = 1, timestep: float = 100.0, - pickle_location: str = None, - pickle_key: str = None, + pickle_location: Optional[str] = None, + pickle_key: Optional[str] = None, ): - """ - Load and shape data from a ReaDDy trajectory. - - - Parameters - ---------- - h5_file_path: str - Path to the ReaDDy .h5 file. If a .dat pickle file exists - at this path, load from that instead. - min_time_ix: int = 0 (optional) - First time index to include. - Default: 0 - max_time_ix: int = -1 (optional) - Last time index to include. - Default: -1 (include all timesteps after min_time_ix) - time_inc: int = 1 (optional) - Include every time_inc timestep. - Default: 1 - timestep: float = 100. (optional) - How much time passes each timestep? - (In any time units, resulting time measurements - will be in the same units.) - Default: 100. - pickle_location: str (optional) - If provided along with pickle_key, - save a pickle file for easy reload. - This can be an AWS S3 bucket or a local path. - pickle_key: str (optional) - If provided along with pickle_location, - save a pickle file for easy reload. - """ - self._readdy_trajectory: readdy.Trajectory = None - self._trajectory: Optional[List[FrameData]] = None + self._readdy_trajectory = None + self._trajectory = None self.h5_file_path = h5_file_path self.min_time_ix = min_time_ix self.max_time_ix = max_time_ix @@ -69,25 +70,25 @@ def readdy_trajectory(self) -> readdy.Trajectory: """ Lazy load the ReaDDy trajectory object. + Note that loading ReaDDy trajectories requires a path to a local file. + Loading currently does not support S3 locations. Returns ------- - readdy_trajectory: readdy.Trajectory + : The ReaDDy trajectory object. """ if self._readdy_trajectory is None: - # this line requires a path to a local file, does not support S3 paths self._readdy_trajectory = readdy.Trajectory(self.h5_file_path) return self._readdy_trajectory @staticmethod - def _frame_edges(time_ix: int, topology_records: Any) -> List[List[int]]: + def _frame_edges(time_ix: int, topology_records: Any) -> list[list[int]]: """ - After a simulation has finished, get all the edges - at the given time index as [particle1 id, particle2 id]. + Get all edges at the given time index as [particle1 id, particle2 id]. - topology_records from - readdy.Trajectory(h5_file_path).read_observable_topologies() + The ``topology_records`` object is output from + ``readdy.Trajectory(h5_file_path).read_observable_topologies()``. """ result = [] for top in topology_records[time_ix]: @@ -98,7 +99,7 @@ def _frame_edges(time_ix: int, topology_records: Any) -> List[List[int]]: result.append([ix1, ix2]) return result - def _shape_trajectory_data(self) -> List[FrameData]: + def _shape_trajectory_data(self) -> list[FrameData]: """Shape data from a ReaDDy trajectory for analysis.""" ( _, @@ -154,28 +155,31 @@ def _shape_trajectory_data(self) -> List[FrameData]: ) result.append(frame) return result - - def _use_pickle(self) -> bool: - return self.pickle_location is not None and self.pickle_key is not None - def trajectory(self) -> List[FrameData]: + def trajectory(self) -> list[FrameData]: """ Lazy load the shaped trajectory. - Returns ------- - trajectory: List[FrameData] + : The trajectory of data shaped for analysis. """ + if self._trajectory is not None: return self._trajectory - if self._use_pickle() and check_key(self.pickle_location, self.pickle_key): - print(f"Loading pickle file for ReaDDy data from {self.h5_file_path}") - self._trajectory = load_pickle(self.pickle_location, self.pickle_key) + + if self.pickle_location is not None and self.pickle_key is not None: + if check_key(self.pickle_location, self.pickle_key): + print(f"Loading pickle file for ReaDDy data from {self.h5_file_path}") + self._trajectory = load_pickle(self.pickle_location, self.pickle_key) + else: + print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") + print(f"Saving pickle file for ReaDDy data to {self.h5_file_path}") + self._trajectory = self._shape_trajectory_data() + save_pickle(self.pickle_location, self.pickle_key, self._trajectory) else: print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") self._trajectory = self._shape_trajectory_data() - if self._use_pickle() and not check_key(self.pickle_location, self.pickle_key): - save_pickle(self.pickle_location, self.pickle_key, self._trajectory) + return self._trajectory From 1366b9813a74cbe21de42def23b09115c6f75869 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:55:16 -0400 Subject: [PATCH 17/63] Update docstrings and type hinting in readdy post processor module --- .../compression_metrics/polymer_trace.py | 16 +- .../simulation/readdy/post_processor.py | 318 +++++++++--------- 2 files changed, 159 insertions(+), 175 deletions(-) diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index e265419..f6f900d 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -1,6 +1,6 @@ """Methods to calculate metrics from polymer trace data.""" -from typing import Any, Dict, Tuple, Union +from typing import Any, Dict, Tuple import numpy as np from sklearn.decomposition import PCA @@ -61,7 +61,7 @@ def get_end_to_end_axis_distances_and_projections( def get_average_distance_from_end_to_end_axis( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the average perpendicular distance of polymer trace points from the end-to-end axis. @@ -152,7 +152,7 @@ def get_pca_polymer_trace_projection( def get_contour_length_from_trace( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the sum of inter-monomer distances in the trace. @@ -172,13 +172,13 @@ def get_contour_length_from_trace( total_distance = np.float_(0) for i in range(len(polymer_trace) - 1): total_distance += np.linalg.norm(polymer_trace[i] - polymer_trace[i + 1]) - return total_distance + return total_distance.item() def get_bending_energy_from_trace( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the bending energy per monomer of a polymer trace. @@ -221,7 +221,7 @@ def get_bending_energy_from_trace( # the type checker is unable to infer its type energy = bending_constant * (1 - np.nanmean(cos_angle)) - return energy + return energy.item() def get_total_fiber_twist( @@ -460,7 +460,7 @@ def get_sum_bending_energy( def get_compression_ratio( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> Union[float, np.floating[Any]]: +) -> float: """ Calculate the compression ratio of a polymer trace. @@ -480,5 +480,5 @@ def get_compression_ratio( : The compression ratio of the polymer trace. """ - end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]) + end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]).item() return 1 - end_to_end_axis_length / get_contour_length_from_trace(polymer_trace) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index 540b768..8990ff4 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -1,69 +1,60 @@ -#!/usr/bin/env python - import math -from typing import Dict, List, Tuple +from typing import Optional import numpy as np -import pandas as pd -from numpy import ndarray from tqdm import tqdm -from ...analysis.compression_metrics.polymer_trace import get_contour_length_from_trace -from .data_structures import FrameData +from subcell_pipeline.analysis.compression_metrics.polymer_trace import ( + get_contour_length_from_trace, +) +from subcell_pipeline.simulation.readdy.data_structures import FrameData class ReaddyPostProcessor: + """Get different views of ReaDDy trajectory for different analysis purposes.""" + + trajectory: list[FrameData] + """ReaDDy data trajectory from ReaddyLoader(h5_file_path).trajectory().""" + + box_size: np.ndarray + """The size of the XYZ dimensions of the simulation volume (shape = 3).""" + + periodic_boundary: bool + """True if simulation had periodic boundary, False otherwise.""" + def __init__( self, - trajectory: List[FrameData], + trajectory: list[FrameData], box_size: np.ndarray, periodic_boundary: bool = False, ): - """ - Get different views of the ReaDDy trajectory - for different analysis purposes. - - - Parameters - ---------- - trajectory: List[FrameData] - A trajectory of ReaDDy data from - ReaddyLoader(h5_file_path).trajectory(). - box_size: np.ndarray (shape = 3) - The size of the XYZ dimensions of the simulation volume. - periodic_boundary: bool (optional) - Was there a periodic boundary in this simulation? - Default: False - """ self.trajectory = trajectory self.box_size = box_size self.periodic_boundary = periodic_boundary - + def times(self) -> np.ndarray: """ Get simulation time at each timestep. Returns ------- - times: np.array (shape = n_timesteps) - Array of time stamps in simulation time for each time step. + times + Array of time stamps in simulation time for each timestep (shape = + n_timesteps). """ - result = [] - for time_ix in self.trajectory: - result.append(self.trajectory[time_ix].time) + result = [trajectory.time for trajectory in self.trajectory] return np.array(result) def _id_for_neighbor_of_types( self, time_ix: int, particle_id: int, - neighbor_types: List[str], - exclude_ids: List[int] = None, + neighbor_types: list[str], + exclude_ids: Optional[list[int]] = None, ) -> int: """ - Get the id for the first neighbor - with a type_name in neighbor_types - at the given time index. + Get the id for the first neighbor with a type_name in neighbor_types at + the given time index. """ particles = self.trajectory[time_ix].particles for neighbor_id in particles[particle_id].neighbor_ids: @@ -78,17 +69,17 @@ def _ids_for_chain_of_types( self, time_ix: int, start_particle_id: int, - chain_particle_types: List[List[str]], + chain_particle_types: list[list[str]], current_polymer_number: int, chain_length: int = 0, - last_particle_id: int = None, - result: List[int] = None, - ) -> List[int]: + last_particle_id: Optional[int] = None, + result: Optional[list[int]] = None, + ) -> list[int]: """ - Starting from the particle with start_particle_id, - get ids for a chain of particles with chain_particle_types - in the given frame of data, + Get IDs for a chain of particles with chain_particle_types in the given + frame of data, starting from the particle with start_particle_id and avoiding the particle with last_particle_id. + If chain_length = 0, return entire chain. """ if result is None: @@ -120,8 +111,8 @@ def _non_periodic_position( self, position1: np.ndarray, position2: np.ndarray ) -> np.ndarray: """ - If the distance between two positions is greater than box_size, - move the second position across the box. + If the distance between two positions is greater than box_size, move the + second position across the box. """ if not self.periodic_boundary: return position2 @@ -146,14 +137,15 @@ def _normalize(vector: np.ndarray) -> np.ndarray: @staticmethod def _orientation_from_positions(positions: np.ndarray) -> np.ndarray: """ - orthonormalize and cross the vectors from a particle position - to prev and next particle positions to get a basis local to the particle. - - positions = [ - prev particle's position, - this particle's position, - next particle's position - ] + Orthonormalize and cross the vectors from a particle position to prev + and next particle positions to get a basis local to the particle. + + The positions array is structured as: + [ + prev particle's position, + this particle's position, + next particle's position, + ] """ v1 = ReaddyPostProcessor._normalize(positions[0] - positions[1]) v2 = ReaddyPostProcessor._normalize(positions[2] - positions[1]) @@ -167,14 +159,15 @@ def _rotation( self, positions: np.ndarray, ideal_positions: np.ndarray ) -> np.ndarray: """ - get the difference in the particles's current orientation - compared to the initial orientation as a rotation matrix. - - positions = [ - prev particle's position, - this particle's position, - next particle's position - ] + Get the difference in the particles's current orientation compared to + the initial orientation as a rotation matrix. + + The positions array is structured as: + [ + prev particle's position, + this particle's position, + next particle's position, + ] """ positions[0] = self._non_periodic_position(positions[1], positions[0]) positions[2] = self._non_periodic_position(positions[1], positions[2]) @@ -185,35 +178,32 @@ def _rotation( def linear_fiber_chain_ids( self, - start_particle_phrases: List[str], - other_particle_types: List[str], + start_particle_phrases: list[str], + other_particle_types: list[str], polymer_number_range: int, - ) -> List[List[List[int]]]: + ) -> list[list[list[int]]]: """ - Get particle IDs for particles - in each linear fiber at each timestep. - + Get particle IDs for particles in each linear fiber at each timestep. Parameters ---------- - start_particle_phrases: List[str] - List of phrases in particle type names - for the first particles in the linear chain. - other_particle_types: List[str] - List of particle type names - (without polymer numbers at the end) - for the particles other than the start particles. - polymer_number_range: int - How many numbers are used to represent the - relative identity of particles in the chain? + start_particle_phrases + List of phrases in particle type names for the first particles in + the linear chain. + other_particle_types + List of particle type names (without polymer numbers at the end) for + the particles other than the start particles. + polymer_number_range + How many numbers are used to represent the relative identity of + particles in the chain? Returns ------- - chain_ids: List[List[List[int]]] - List of lists of lists of the particle IDs - for each particle for each fiber at each time. + : + List of lists of lists of the particle IDs for each particle for + each fiber at each time. """ - result: List[List[List[int]]] = [] + result: list[list[list[int]]] = [] chain_particle_types = [] for i in range(polymer_number_range): chain_particle_types.append( @@ -245,39 +235,37 @@ def linear_fiber_chain_ids( def linear_fiber_axis_positions( self, - fiber_chain_ids: List[List[List[int]]], + fiber_chain_ids: list[list[list[int]]], ideal_positions: np.ndarray, ideal_vector_to_axis: np.ndarray, - ) -> Tuple[List[List[np.ndarray]], List[List[List[int]]]]: + ) -> tuple[list[list[np.ndarray]], list[list[list[int]]]]: """ - Get XYZ axis positions for each particle - in each linear fiber at each timestep. - + Get XYZ axis positions for each particle in each linear fiber at each + timestep. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for each particle in each fiber at each time. - ideal_positions: np.ndarray (shape = 3 x 3) - XYZ positions for 3 particles in an ideal chain. - ideal_vector_to_axis: np.ndarray (shape = 3) - Vector from the second ideal position - to the axis of the fiber. + fiber_chain_ids + List of list of lists of particle IDs for each particle in each + fiber at each time. + ideal_positions + XYZ positions for 3 particles in an ideal chain (shape = 3 x 3). + ideal_vector_to_axis + Vector from the second ideal position to the axis of the fiber + (shape = 3). Returns ------- - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - new_chain_ids: List[List[List[int]] - List of lists of lists of particle IDs - matching the axis_positions + axis_positions + Lists of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + new_chain_ids + List of lists of lists of particle IDs matching the axis_positions for each particle in each fiber at each time. """ - result: List[List[np.ndarray]] = [] - ids: List[List[List[int]]] = [] + result: list[list[np.ndarray]] = [] + ids: list[list[list[int]]] = [] for time_ix in range(len(fiber_chain_ids)): result.append([]) ids.append([]) @@ -299,7 +287,7 @@ def linear_fiber_axis_positions( break if pos_invalid: break - rotation = self._rotation(positions, ideal_positions) + rotation = self._rotation(np.array(positions), ideal_positions) if rotation is None: break vector_to_axis_local = np.squeeze( @@ -312,42 +300,40 @@ def linear_fiber_axis_positions( new_ids.append(particle_ix) if len(axis_positions) < 2: continue - result[time_ix].append(axis_positions) + result[time_ix].append(np.array(axis_positions)) ids[time_ix].append(new_ids) return result, ids def linear_fiber_normals( self, - fiber_chain_ids: List[List[List[int]]], - axis_positions: List[List[np.ndarray]], + fiber_chain_ids: list[list[list[int]]], + axis_positions: list[list[np.ndarray]], normal_length: float = 5, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: """ - Get XYZ positions defining start and end points for normals - for each particle in each fiber at each timestep. - + Get XYZ positions defining start and end points for normals for each + particle in each fiber at each timestep. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for particles in each fiber at each time. - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - normal_length: float (optional) - Length of the resulting normal vectors - in the trajectory's spatial units. - Default: 5 + fiber_chain_ids + List of lists of lists of particle IDs for particles in each fiber + at each time. + axis_positions + List of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + normal_length + Length of the resulting normal vectors in the trajectory's spatial + units. Returns ------- - normals: List[List[np.ndarray (shape = 2 x 3)]] - List of lists of arrays containing the x,y,z normals + : + List of lists of arrays (shape = 2 x 3) containing the x,y,z normals of each particle in each fiber at each time. """ - result: List[List[np.ndarray]] = [] + result: list[list[np.ndarray]] = [] for time_ix in range(len(fiber_chain_ids)): result.append([]) particles = self.trajectory[time_ix].particles @@ -367,33 +353,32 @@ def linear_fiber_normals( @staticmethod def linear_fiber_control_points( - axis_positions: List[List[np.ndarray]], + axis_positions: list[list[np.ndarray]], n_points: int, - ) -> List[List[np.ndarray]]: + ) -> list[list[np.ndarray]]: """ - Resample the fiber line defined by each array of axis positions - to get the requested number of points between XYZ control points - for each linear fiber at each timestep. - + Resample the fiber line defined by each array of axis positions to get + the requested number of points between XYZ control points for each + linear fiber at each timestep. Parameters ---------- - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - n_points: int + axis_positions + List of lists of arrays (shape = n x 3) containing the x,y,z + positions of the closest point on the fiber axis to the position of + each particle in each fiber at each time. + n_points Number of control points (spaced evenly) on resulting fibers. Returns ------- - control_points: List[List[np.ndarray (shape = n x 3)]] - Array containing the x,y,z positions - of control points for each fiber at each time. + : + Array (shape = n x 3) containing the x,y,z positions of control + points for each fiber at each time. """ if n_points < 2: raise Exception("n_points must be > 1 to define a fiber.") - result: List[List[np.ndarray]] = [] + result: list[list[np.ndarray]] = [] for time_ix in tqdm(range(len(axis_positions))): result.append([]) contour_length = get_contour_length_from_trace(axis_positions[time_ix][0]) @@ -403,11 +388,13 @@ def linear_fiber_control_points( control_points = np.zeros((n_points, 3)) control_points[0] = positions[0] current_position = np.copy(positions[0]) - leftover_length = 0 + leftover_length: float = 0 for pos_ix in range(1, len(positions)): v_segment = positions[pos_ix] - positions[pos_ix - 1] direction = ReaddyPostProcessor._normalize(v_segment) - remaining_length = np.linalg.norm(v_segment) + leftover_length + remaining_length = ( + np.linalg.norm(v_segment).item() + leftover_length + ) while remaining_length >= segment_length: current_position += ( segment_length - leftover_length @@ -423,43 +410,40 @@ def linear_fiber_control_points( def fiber_bond_energies( self, - fiber_chain_ids: List[List[List[int]]], - ideal_lengths: Dict[int, float], - ks: Dict[int, float], + fiber_chain_ids: list[list[list[int]]], + ideal_lengths: dict[int, float], + ks: dict[int, float], stride: int = 1, - ) -> Tuple[Dict[int, np.ndarray], np.ndarray]: + ) -> tuple[dict[int, np.ndarray], np.ndarray]: """ - Get the strain energy using the harmonic spring equation - and the bond distance between particles - with a given polymer number offset. - + Get the strain energy using the harmonic spring equation and the bond + distance between particles with a given polymer number offset. Parameters ---------- - fiber_chain_ids: List[List[List[int]]] - List of lists of lists of particle IDs - for particles in each fiber at each time. - ideal_lengths: Dict[int,float] + fiber_chain_ids + List of lists of lists of particle IDs for particles in each fiber + at each time. + ideal_lengths Ideal bond length for each of the polymer number offsets. - ks: Dict[int,float] + ks Bond energy constant for each of the polymer number offsets. - stride: int (optional) + stride Calculate bond energy every stride timesteps. - Default: 1 Returns ------- - bond_energies: Dict[int,np.ndarray (shape = time x bonds)] - For each polymer number offset, an array of bond energy - for each bond at each time. - filament_positions: np.ndarray (shape = time x bonds) - Position in the filament from the starting end - for the first particle in each bond at each time. + bond_energies + Map of polymer number offset to array (shape = time x bonds) of bond + energy for each bond at each time. + filament_positions + Array (shape = time x bonds) of position in the filament from the + starting end for the first particle in each bond at each time. """ - energies: Dict[int, List[List[float]]] = {} + energies: dict[int, list[list[float]]] = {} for offset in ideal_lengths: energies[offset] = [] - filament_positions: List[List[int]] = [] + filament_positions: list[list[int]] = [] for time_ix in range(0, len(self.trajectory), stride): for offset in ideal_lengths: energies[offset].append([]) @@ -480,7 +464,7 @@ def fiber_bond_energies( particle.position, offset_particle.position ) bond_stretch = ( - np.linalg.norm(offset_pos - particle.position) + np.linalg.norm(offset_pos - particle.position).item() - ideal_lengths[offset] ) energy = 0.5 * ks[offset] * bond_stretch * bond_stretch @@ -493,13 +477,13 @@ def fiber_bond_energies( np.array(filament_positions), ) - def edge_positions(self) -> List[List[np.ndarray]]: + def edge_positions(self) -> list[list[np.ndarray]]: """ Get the edges between particles as start and end positions. Returns ------- - particle_edges: List[List[np.ndarray]] + : List of list of edges as position of each of the two particles connected by the edge for each edge at each time. """ From 5190623e313a3409eb33a72682d7e1768ad2cc67 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:28:51 -0400 Subject: [PATCH 18/63] Fix readdy no compression simulation series name --- .../readdy/_run_readdy_no_compression_batch_simulations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py index b43a295..4672dc3 100644 --- a/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py +++ b/subcell_pipeline/simulation/readdy/_run_readdy_no_compression_batch_simulations.py @@ -35,7 +35,7 @@ # %% # Name of the simulation series -series_name: str = "ACTIN_COMPRESSION_VELOCITY" +series_name: str = "ACTIN_NO_COMPRESSION" # Template for simulation output files source_template: str = "outputs/actin_compression_baseline_%d.h5" From f6e7376e72d92fd78aa112efa1847eab739192af Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 18:06:49 -0400 Subject: [PATCH 19/63] Update readdy simulation workflows --- .../simulation/readdy/__init__.py | 7 +- ..._process_readdy_compression_simulations.py | 29 +- ...ocess_readdy_no_compression_simulations.py | 25 +- subcell_pipeline/simulation/readdy/loader.py | 4 +- subcell_pipeline/simulation/readdy/parser.py | 294 ++++++++++-------- .../simulation/readdy/post_processor.py | 47 ++- 6 files changed, 258 insertions(+), 148 deletions(-) diff --git a/subcell_pipeline/simulation/readdy/__init__.py b/subcell_pipeline/simulation/readdy/__init__.py index 5582fab..9dfa45e 100644 --- a/subcell_pipeline/simulation/readdy/__init__.py +++ b/subcell_pipeline/simulation/readdy/__init__.py @@ -1,6 +1 @@ -"""readdy package for subcell_analysis.""" - -from .data_structures import FrameData, TopologyData, ParticleData # noqa: F401 -from .loader import ReaddyLoader # noqa: F401 -from .post_processor import ReaddyPostProcessor # noqa: F401 -from .parser import load_readdy_fiber_points # noqa: F401 \ No newline at end of file +"""Simulation methods and notebooks for ReaDDy.""" diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py index 366330b..523d9f1 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -1,5 +1,5 @@ # %% [markdown] -# # Process ReaDDy simulations +# # Process ReaDDy compression simulations # %% [markdown] """ @@ -20,6 +20,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% +from pathlib import Path + from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data # %% [markdown] @@ -50,6 +52,18 @@ # Number of monomer points per fiber n_monomer_points = 200 +# Total number of steps for each condition +total_steps: dict[str, int] = { + "0047": int(3.2e8), + "0150": int(1e8), + "0470": int(3.2e7), + "1500": int(1e7), +} + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[3] / "aws_downloads" +temp_path.mkdir(parents=True, exist_ok=True) + # %% [markdown] """ ## Parse simulation data @@ -59,11 +73,18 @@ condition key and replicate already exists, parsing is skipped. - Input: `(series_name)/outputs/(series_name)_(condition_key)_(index + 1).h5` -- Output: `(series_name)/data/(series_name)_(condition_key)_(index + 1).csv` and - `(series_name)/data/(series_name)_(condition_key)_(index + 1).pkl` +- Input: `(series_name)/data/(series_name)_(condition_key)_(index + 1).pkl` +- Output: `(series_name)/samples/(series_name)_(condition_key)_(index + 1).csv` """ # %% parse_readdy_simulation_data( - bucket, series_name, condition_keys, n_replicates, n_timepoints, n_monomer_points + bucket, + series_name, + condition_keys, + n_replicates, + n_timepoints, + n_monomer_points, + total_steps, + str(temp_path), ) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py index 318fa18..85e1fe0 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -1,5 +1,5 @@ # %% [markdown] -# # Process ReaDDy simulations +# # Process ReaDDy no compression simulations # %% [markdown] """ @@ -20,6 +20,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% +from pathlib import Path + from subcell_pipeline.simulation.readdy.parser import parse_readdy_simulation_data # %% [markdown] @@ -46,6 +48,13 @@ # Number of monomer points per fiber n_monomer_points = 200 +# Total number of steps for each condition +total_steps: dict[str, int] = {"": int(1e7)} + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[3] / "aws_downloads" +temp_path.mkdir(parents=True, exist_ok=True) + # %% [markdown] """ ## Parse simulation data @@ -54,11 +63,19 @@ into a tidy data format. If the parsed file for a given replicate already exists, parsing is skipped. -- Input: `(series_name)/outputs/(series_name)_(index+1).h5` -- Output: `(series_name)/data/(series_name)_(index+1).csv` +- Input: `(series_name)/outputs/(series_name)_(index + 1).h5` +- Input: `(series_name)/data/(series_name)_(index + 1).pkl` +- Output: `(series_name)/samples/(series_name)_(index + 1).csv` """ # %% parse_readdy_simulation_data( - bucket, series_name, [""], n_replicates, n_timepoints, n_monomer_points + bucket, + series_name, + [""], + n_replicates, + n_timepoints, + n_monomer_points, + total_steps, + str(temp_path), ) diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index 7f1a983..b45939a 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -116,7 +116,7 @@ def _shape_trajectory_data(self) -> list[FrameData]: if ( time_ix < self.min_time_ix or (self.max_time_ix >= 0 and time_ix > self.max_time_ix) - or time_ix % self.time_inc != 0 + or times[time_ix] % self.time_inc != 0 ): continue frame = FrameData(time=self.timestep * time_ix) @@ -171,7 +171,7 @@ def trajectory(self) -> list[FrameData]: if self.pickle_location is not None and self.pickle_key is not None: if check_key(self.pickle_location, self.pickle_key): - print(f"Loading pickle file for ReaDDy data from {self.h5_file_path}") + print(f"Loading pickle file for ReaDDy data from {self.pickle_key}") self._trajectory = load_pickle(self.pickle_location, self.pickle_key) else: print(f"Loading ReaDDy data from h5 file {self.h5_file_path}") diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index 78c7cd7..327edc5 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -1,40 +1,91 @@ """Methods for parsing ReaDDy simulations.""" import os -from typing import List, Union, Tuple +from typing import Optional, Union +import boto3 import numpy as np import pandas as pd +from botocore.exceptions import ClientError from io_collection.keys.check_key import check_key from io_collection.save.save_dataframe import save_dataframe -from .loader import ReaddyLoader -from .post_processor import ReaddyPostProcessor -from ...constants import ( - COLUMN_NAMES, - COLUMN_DTYPES, - BOX_SIZE, - READDY_TIMESTEP, - READDY_TOTAL_STEPS, - ACTIN_START_PARTICLE_PHRASE, - ACTIN_PARTICLE_TYPES, - IDEAL_ACTIN_POSITIONS, - IDEAL_ACTIN_VECTOR_TO_AXIS, - WORKING_DIR_PATH, -) -from ...temporary_file_io import download_readdy_hdf5 - - -def readdy_post_processor( +from subcell_pipeline.simulation.readdy.loader import ReaddyLoader +from subcell_pipeline.simulation.readdy.post_processor import ReaddyPostProcessor + +COLUMN_NAMES: list[str] = [ + "fiber_id", + "xpos", + "ypos", + "zpos", + "xforce", + "yforce", + "zforce", + "segment_curvature", + "time", + "fiber_point", +] +"""Parsed tidy data column names.""" + +COLUMN_DTYPES: dict[str, Union[type[float], type[int]]] = { + "fiber_id": int, + "xpos": float, + "ypos": float, + "zpos": float, + "xforce": float, + "yforce": float, + "zforce": float, + "segment_curvature": float, + "time": float, + "fiber_point": int, +} +"""Parsed tidy data column data types.""" + +READDY_TIMESTEP: float = 0.1 +"""Simulation timestep (in ns).""" + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) +"""Default simulation volume dimensions (x, y, z).""" + + +def _download_s3_file(bucket: str, key: str, dest_path: str) -> Optional[str]: + """ + Download file from S3 to local path. + + Parameters + ---------- + bucket + Name of S3 bucket. + key + Source key. + dest_path + Target local path. + """ + + s3_client = boto3.client("s3") + + if os.path.isfile(dest_path): + return dest_path + try: + s3_client.download_file(bucket, key, dest_path) + print(f"Downloaded [ {key} ] to [ {dest_path} ].") + return dest_path + except ClientError: + print(f"!!! Failed to download {key}") + return None + + +def download_readdy_hdf5( bucket: str, - series_name: str, - series_key: str, + series_name: str, + series_key: str, rep_ix: int, - n_timepoints: int, -) -> ReaddyPostProcessor: + download_path: str, +) -> Optional[str]: """ - Load a ReaddyPostProcessor from the specified ReaDDy trajectory. - (Load from a pickle if it exists.) + Download ReaDDy h5 files from S3 to local path. + + The ReaDDy Python package currently requires a local file path. Parameters ---------- @@ -43,128 +94,105 @@ def readdy_post_processor( series_name Name of simulation series. series_key - Name of simulation series plus condition_key if applicable. + Combination of series and condition names. rep_ix Replicate index. - n_timepoints - Number of timepoints to visualize. + download_path + Path for downloading temporary h5 files. """ - h5_file_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") - rep_id = rep_ix + 1 - pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" - time_inc = READDY_TOTAL_STEPS[series_key] / n_timepoints - readdy_loader = ReaddyLoader( - h5_file_path=str(h5_file_path), - time_inc=time_inc, - timestep=READDY_TIMESTEP, - pickle_location=bucket, - pickle_key=pickle_key, - ) - return ReaddyPostProcessor( - readdy_loader.trajectory(), # this will load from a pickle if it exists - box_size=BOX_SIZE, - ) + if bucket.startswith("s3://"): + bucket = bucket.replace("s3://", "") + + aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" + local_h5_path = os.path.join(download_path, f"{series_key}_{rep_ix}.h5") + return _download_s3_file(bucket, aws_h5_key, local_h5_path) -def load_readdy_fiber_points( + +def parse_readdy_simulation_single_fiber_trajectory( bucket: str, - series_name: str, - series_key: str, + series_name: str, + series_key: str, rep_ix: int, n_timepoints: int, n_monomer_points: int, -) -> Tuple[List[List[List[int]]], List[List[np.ndarray]], np.ndarray, np.ndarray]: + total_steps: int, + temp_path: str, + timestep: float = READDY_TIMESTEP, +) -> pd.DataFrame: """ - Load a ReaDDy trajectory, calculate the polymer trace from - the monomer particle positions (using measurements from x-ray crystallography), - and resample to get the requested number of points - along each linear fiber at each timestep. + Parse ReaDDy trajectory data into tidy data format. + + Note that this methods assumes there is only one fiber in the simulation. Parameters ---------- bucket Name of S3 bucket for input and output files. series_name - Name of simulation series. + Name of simulation. series_key - Name of simulation series plus condition_key if applicable. + Series key. rep_ix Replicate index. n_timepoints - Number of timepoints to visualize. + Number of equally spaced timepoints to sample. n_monomer_points - Number of control points for each polymer trace. - - Returns - ------- - readdy_post_processor: ReaddyPostProcessor - The ReaddyPostProcessor loaded with this trajectory - in case it is needed for additional analysis. - fiber_chain_ids: List[List[List[int]]] - Particle IDs for particles in each linear fiber at each timestep - that match the axis_positions list. - axis_positions: List[List[np.ndarray (shape = n x 3)]] - List of lists of arrays containing the x,y,z positions - of the closest point on the fiber axis to the position - of each particle in each fiber at each time. - fiber_points: np.ndarray (shape = n_timepoints x n_fibers (1) x n x 3) - Array containing the x,y,z positions - of control points for each fiber at each time. - times: np.ndarray (shape = n_timepoints) - Simulation time at each timestep. + Number of equally spaced monomer points to sample. + total_steps + Total number of steps for each given simulation. + temp_path + Path for saving temporary h5 files. + timestep + Simulation timestep (in ns). """ - readdy_post_processor = readdy_post_processor( - bucket, series_name, series_key, rep_ix, n_timepoints - ) - fiber_chain_ids = readdy_post_processor.linear_fiber_chain_ids( - start_particle_phrases=[ACTIN_START_PARTICLE_PHRASE], - other_particle_types=ACTIN_PARTICLE_TYPES, - polymer_number_range=5, + + h5_file_path = download_readdy_hdf5( + bucket, series_name, series_key, rep_ix, temp_path ) - axis_positions, fiber_chain_ids = readdy_post_processor.linear_fiber_axis_positions( - fiber_chain_ids=fiber_chain_ids, - ideal_positions=IDEAL_ACTIN_POSITIONS, - ideal_vector_to_axis=IDEAL_ACTIN_VECTOR_TO_AXIS, + + assert isinstance(h5_file_path, str) + + rep_id = rep_ix + 1 + pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" + time_inc = total_steps // n_timepoints + + readdy_loader = ReaddyLoader( + h5_file_path=h5_file_path, + time_inc=time_inc, + timestep=timestep, + pickle_location=bucket, + pickle_key=pickle_key, ) - fiber_points = readdy_post_processor.linear_fiber_control_points( + + post_processor = ReaddyPostProcessor(readdy_loader.trajectory(), box_size=BOX_SIZE) + + times = post_processor.times() + fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + + fiber_points = post_processor.linear_fiber_control_points( axis_positions=axis_positions, n_points=n_monomer_points, ) - times = readdy_post_processor.times() - return readdy_post_processor, fiber_chain_ids, axis_positions, np.array(fiber_points), times - -def _parse_readdy_simulation_trajectory( - bucket: str, - series_name: str, - series_key: str, - rep_ix: int, - n_timepoints: int, - n_monomer_points: int, -) -> pd.DataFrame: - """ - Parse ReaDDy trajectory data into tidy data format. - (Assume one fiber) - """ - _, _, _, fiber_points, times = load_readdy_fiber_points( - bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points - ) - point_data: list[list[Union[str, int, float]]] = [] - for time_ix in range(fiber_points.shape[0]): - for pos_ix in range(fiber_points.shape[2]): - point_data.append([ - 1, # fiber_id - fiber_points[time_ix][0][pos_ix][0], # xpos - fiber_points[time_ix][0][pos_ix][1], # ypos - fiber_points[time_ix][0][pos_ix][2], # zpos - 0.0, # xforce - 0.0, # yforce - 0.0, # zforce - 0.0, # segment_curvature - times[time_ix], # time - pos_ix, # fiber_point - ]) + for time_ix in range(len(fiber_points)): + for pos_ix in range(fiber_points[0][0].shape[0]): + point_data.append( + [ + 1, # fiber_id + fiber_points[time_ix][0][pos_ix][0], # xpos + fiber_points[time_ix][0][pos_ix][1], # ypos + fiber_points[time_ix][0][pos_ix][2], # zpos + 0.0, # xforce + 0.0, # yforce + 0.0, # zforce + 0.0, # segment_curvature + times[time_ix], # time + pos_ix, # fiber_point + ] + ) # Combine all data into dataframe and update data types. dataframe = pd.DataFrame(point_data, columns=COLUMN_NAMES) @@ -182,8 +210,10 @@ def parse_readdy_simulation_data( series_name: str, condition_keys: list[str], n_replicates: int, - n_timepoints: int, + n_timepoints: int, n_monomer_points: int, + total_steps: dict[str, int], + temp_path: str, ) -> None: """ Parse ReaDDy simulation data for select conditions and replicates. @@ -198,13 +228,22 @@ def parse_readdy_simulation_data( List of condition keys. n_replicates Number of simulation replicates. + n_timepoints + Number of equally spaced timepoints to sample. + n_monomer_points + Number of equally spaced monomer points to sample. + total_steps + Total number of steps for each simulation key. + temp_path + Path for saving temporary h5 files. """ + for condition_key in condition_keys: series_key = f"{series_name}_{condition_key}" if condition_key else series_name for rep_ix in range(n_replicates): rep_id = rep_ix + 1 - dataframe_key = f"{series_name}/data/{series_key}_{rep_id:06d}.csv" + dataframe_key = f"{series_name}/samples/{series_key}_{rep_id:06d}.csv" # Skip if dataframe file already exists. if check_key(bucket, dataframe_key): @@ -212,11 +251,16 @@ def parse_readdy_simulation_data( continue print(f"Parsing data for [ {condition_key} ] replicate [ {rep_ix} ]") - - download_readdy_hdf5(bucket, series_name, series_key, rep_ix) - - data = _parse_readdy_simulation_trajectory( - bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points + + data = parse_readdy_simulation_single_fiber_trajectory( + bucket, + series_name, + series_key, + rep_ix, + n_timepoints, + n_monomer_points, + total_steps[condition_key], + temp_path, ) save_dataframe(bucket, dataframe_key, data, index=False) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index 8990ff4..faca8ff 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -9,6 +9,39 @@ ) from subcell_pipeline.simulation.readdy.data_structures import FrameData +ACTIN_START_PARTICLE_PHRASE: list[str] = ["pointed"] +"""Phrases indicating actin start particle.""" + +ACTIN_PARTICLE_TYPES: list[str] = [ + "actin#", + "actin#ATP_", + "actin#mid_", + "actin#mid_ATP_", + "actin#fixed_", + "actin#fixed_ATP_", + "actin#mid_fixed_", + "actin#mid_fixed_ATP_", + "actin#barbed_", + "actin#barbed_ATP_", + "actin#fixed_barbed_", + "actin#fixed_barbed_ATP_", +] +"""Actin particle types from simularium/readdy-models.""" + +IDEAL_ACTIN_POSITIONS: np.ndarray = np.array( + [ + [24.738, 20.881, 26.671], + [27.609, 24.061, 27.598], + [30.382, 21.190, 25.725], + ] +) +"""Ideal actin positions measured from crystal structure.""" + +IDEAL_ACTIN_VECTOR_TO_AXIS: np.ndarray = np.array( + [-0.01056751, -1.47785105, -0.65833209] +) +"""Ideal actin vector to axis.""" + class ReaddyPostProcessor: """Get different views of ReaDDy trajectory for different analysis purposes.""" @@ -178,24 +211,24 @@ def _rotation( def linear_fiber_chain_ids( self, - start_particle_phrases: list[str], - other_particle_types: list[str], polymer_number_range: int, + start_particle_phrases: list[str] = ACTIN_START_PARTICLE_PHRASE, + other_particle_types: list[str] = ACTIN_PARTICLE_TYPES, ) -> list[list[list[int]]]: """ Get particle IDs for particles in each linear fiber at each timestep. Parameters ---------- + polymer_number_range + How many numbers are used to represent the relative identity of + particles in the chain? start_particle_phrases List of phrases in particle type names for the first particles in the linear chain. other_particle_types List of particle type names (without polymer numbers at the end) for the particles other than the start particles. - polymer_number_range - How many numbers are used to represent the relative identity of - particles in the chain? Returns ------- @@ -236,8 +269,8 @@ def linear_fiber_chain_ids( def linear_fiber_axis_positions( self, fiber_chain_ids: list[list[list[int]]], - ideal_positions: np.ndarray, - ideal_vector_to_axis: np.ndarray, + ideal_positions: np.ndarray = IDEAL_ACTIN_POSITIONS, + ideal_vector_to_axis: np.ndarray = IDEAL_ACTIN_VECTOR_TO_AXIS, ) -> tuple[list[list[np.ndarray]], list[list[list[int]]]]: """ Get XYZ axis positions for each particle in each linear fiber at each From 548f3aa69b3e9aa1a72cf1075b8ae11acf482ceb Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 2 Jul 2024 18:13:28 -0400 Subject: [PATCH 20/63] Fix autodoc for readdy simulation module --- docs/conf.py | 2 +- subcell_pipeline/simulation/readdy/data_structures.py | 2 ++ subcell_pipeline/simulation/readdy/loader.py | 2 ++ subcell_pipeline/simulation/readdy/post_processor.py | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 9177489..651c26c 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -58,7 +58,7 @@ # List of modules to be mocked up. Useful when some external dependencies are # not met at build time and break the building process. -autodoc_mock_imports = [] +autodoc_mock_imports = ["readdy"] # Controls how to represent typehints. autodoc_typehints = "signature" diff --git a/subcell_pipeline/simulation/readdy/data_structures.py b/subcell_pipeline/simulation/readdy/data_structures.py index d955a92..a1873d6 100644 --- a/subcell_pipeline/simulation/readdy/data_structures.py +++ b/subcell_pipeline/simulation/readdy/data_structures.py @@ -1,3 +1,5 @@ +"""Data structures for ReaDDy simulations.""" + from typing import Optional import numpy as np diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index b45939a..e0f2366 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -1,3 +1,5 @@ +"""Class for loading and shaping ReaDDy trajectories.""" + from typing import Any, Optional import numpy as np diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index faca8ff..2b5e625 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -1,3 +1,5 @@ +"""Class for post processing ReaDDy trajectories.""" + import math from typing import Optional From 5497614d5ce02bd2c2c7ca7a755b954e3adcc848 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:25:35 -0400 Subject: [PATCH 21/63] Fix floating point error when calculating control points --- subcell_pipeline/simulation/readdy/post_processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index 2b5e625..baa5739 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -430,7 +430,10 @@ def linear_fiber_control_points( remaining_length = ( np.linalg.norm(v_segment).item() + leftover_length ) - while remaining_length >= segment_length: + # Rounding to 9 decimal places to avoid floating point error + # where the remaining length is very close to the segment + # length, causeing the final control point to be skipped. + while round(remaining_length, 9) >= round(segment_length, 9): current_position += ( segment_length - leftover_length ) * direction From 9ca451b7219642e5292b336b6b2965d921d27ae9 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 11:31:05 -0400 Subject: [PATCH 22/63] Fix readdy series name in dim reduction analysis --- .../_run_pacmap_on_compression_simulations.py | 4 +++- .../_run_pca_on_compression_simulations.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py index ecf8f5a..205d9de 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pacmap_on_compression_simulations.py @@ -67,7 +67,9 @@ """ # %% -readdy_data = get_merged_data(readdy_bucket, series_name, condition_keys, random_seeds) +readdy_data = get_merged_data( + readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds +) readdy_data["simulator"] = "readdy" # %% diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py index bdcd38c..e7e5bd3 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py @@ -81,7 +81,9 @@ """ # %% -readdy_data = get_merged_data(readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds) +readdy_data = get_merged_data( + readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds +) readdy_data["simulator"] = "readdy" # %% From 36065d0735b4c273bb5f0df203072851e1aadc0b Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:27:49 -0400 Subject: [PATCH 23/63] Add compression metric descriptions and bounds to enum --- .../compression_metrics/compression_metric.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/subcell_pipeline/analysis/compression_metrics/compression_metric.py b/subcell_pipeline/analysis/compression_metrics/compression_metric.py index 8de9d64..260b746 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_metric.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_metric.py @@ -61,6 +61,58 @@ def label(self: Enum) -> str: } return labels.get(self.value, "") + def description(self: Enum) -> str: + """ + Return the description for the compression metric. + + Parameters + ---------- + self + the CompressionMetric object + + Returns + ------- + : + The description (and units) for the compression metric. + """ + units = { + CompressionMetric.NON_COPLANARITY.value: "3rd component variance from PCA", + CompressionMetric.PEAK_ASYMMETRY.value: "normalized peak distance", + CompressionMetric.SUM_BENDING_ENERGY.value: "sum of bending energy", + CompressionMetric.AVERAGE_PERP_DISTANCE.value: "distance (nm)", + CompressionMetric.TOTAL_FIBER_TWIST.value: "total fiber twist", + CompressionMetric.CALC_BENDING_ENERGY.value: "energy", + CompressionMetric.CONTOUR_LENGTH.value: "filament contour length (nm)", + CompressionMetric.COMPRESSION_RATIO.value: "compression ratio", + } + return units.get(self.value, "") + + def bounds(self: Enum) -> tuple[float,float]: + """ + Return the default bounds for the compression metric. + + Parameters + ---------- + self + the CompressionMetric object + + Returns + ------- + : + The default bounds for the compression metric. + """ + bounds = { + CompressionMetric.NON_COPLANARITY.value: (0, 0.03), + CompressionMetric.PEAK_ASYMMETRY.value: (0, 0.5), + CompressionMetric.SUM_BENDING_ENERGY.value: (0, 0), # TODO + CompressionMetric.AVERAGE_PERP_DISTANCE.value: (0, 85.0), + CompressionMetric.TOTAL_FIBER_TWIST.value: (0, 0), # TODO + CompressionMetric.CALC_BENDING_ENERGY.value: (0, 10), + CompressionMetric.CONTOUR_LENGTH.value: (480, 505), + CompressionMetric.COMPRESSION_RATIO.value: (0, 0), # TODO + } + return bounds.get(self.value, (0, 0)) + def calculate_metric( self, polymer_trace: np.ndarray, **options: dict[str, Any] ) -> Union[float, np.floating[Any]]: From 6f7b3d5ca2c744ae057b70fbb4b775940d5d18ec Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:28:24 -0400 Subject: [PATCH 24/63] Update compression workflow to include non compression cases --- .../analysis/compression_metrics/README.md | 4 +- .../_compare_compression_metrics.py | 45 ++++++++++++++----- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/subcell_pipeline/analysis/compression_metrics/README.md b/subcell_pipeline/analysis/compression_metrics/README.md index f5705dd..b595d76 100644 --- a/subcell_pipeline/analysis/compression_metrics/README.md +++ b/subcell_pipeline/analysis/compression_metrics/README.md @@ -2,6 +2,6 @@ ## Metrics for comparing traces of compressed fibers -Analysis combines compression simulations from Cytosim and Readdy and calculates various metrics to compare the compressed fibers. +Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics metrics to compare fibers. -- **Calculate compression metrics** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html)) +- **Compare compression metrics between simulators** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html)) diff --git a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py index e0d4d83..989029e 100644 --- a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py +++ b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py @@ -1,5 +1,5 @@ # %% [markdown] -# # Compare metrics across simulators +# # Compare compression metrics between simulators # %% [markdown] """ @@ -39,12 +39,14 @@ Defines the `COMPRESSION_VELOCITY` simulation series, which compresses a single 500 nm actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with -five replicates each (random seeds 1, 2, 3, 4, and 5). +five replicates each and the baseline `NO_COMPRESSION` simulation series, which +simulates a single actin fiber with a free barbed end across five replicates. """ # %% # Name of the simulation series -series_name: str = "COMPRESSION_VELOCITY" +compression_series_name: str = "COMPRESSION_VELOCITY" +no_compression_series_name: str = "NO_COMPRESSION" # S3 bucket Cytosim for input and output files cytosim_bucket: str = "s3://cytosim-working-bucket" @@ -89,15 +91,26 @@ """ # %% -cytosim_metrics = get_compression_metric_data( +cytosim_metrics_compression = get_compression_metric_data( bucket=cytosim_bucket, - series_name=series_name, + series_name=compression_series_name, condition_keys=condition_keys, random_seeds=random_seeds, metrics=metrics, recalculate=recalculate, ) -cytosim_metrics["simulator"] = "cytosim" +cytosim_metrics_compression["simulator"] = "cytosim" + +# %% +cytosim_metrics_no_compression = get_compression_metric_data( + bucket=cytosim_bucket, + series_name=no_compression_series_name, + condition_keys=[""], + random_seeds=random_seeds, + metrics=metrics, + recalculate=recalculate, +) +cytosim_metrics_no_compression["simulator"] = "cytosim" # %% [markdown] """ @@ -105,15 +118,26 @@ """ # %% -readdy_metrics = get_compression_metric_data( +readdy_metrics_compression = get_compression_metric_data( bucket=readdy_bucket, - series_name=f"ACTIN_{series_name}", + series_name=f"ACTIN_{compression_series_name}", condition_keys=condition_keys, random_seeds=random_seeds, metrics=metrics, recalculate=recalculate, ) -readdy_metrics["simulator"] = "readdy" +readdy_metrics_compression["simulator"] = "readdy" + +# %% +readdy_metrics_no_compression = get_compression_metric_data( + bucket=readdy_bucket, + series_name=f"ACTIN_{no_compression_series_name}", + condition_keys=[""], + random_seeds=random_seeds, + metrics=metrics, + recalculate=recalculate, +) +readdy_metrics_no_compression["simulator"] = "readdy" # %% [markdown] """ @@ -121,7 +145,7 @@ """ # %% -combined_metrics = pd.concat([cytosim_metrics, readdy_metrics]) +combined_metrics = pd.concat([cytosim_metrics_compression, readdy_metrics_compression]) combined_metrics["repeat"] = combined_metrics["seed"] - 1 combined_metrics["velocity"] = combined_metrics["key"].astype("int") / 10 @@ -135,7 +159,6 @@ combined_metrics, str(save_location), "actin_compression_combined_metrics.csv" ) - # %% [markdown] """ ## Plot metrics vs time From e89b6a0f5f2e631341de99ceb506894568c66cce Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:29:04 -0400 Subject: [PATCH 25/63] Fix readdy post processor normal calculation to skip start and end --- subcell_pipeline/simulation/readdy/post_processor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index baa5739..9dbe1db 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -373,11 +373,15 @@ def linear_fiber_normals( result.append([]) particles = self.trajectory[time_ix].particles for chain_ix in range(len(fiber_chain_ids[time_ix])): + n_particles = len(fiber_chain_ids[time_ix][chain_ix]) for particle_ix, particle_id in enumerate( fiber_chain_ids[time_ix][chain_ix] ): + # Skip first and last particle + if particle_ix == 0 or particle_ix == n_particles - 1: + continue position = particles[particle_id].position - axis_position = axis_positions[time_ix][chain_ix][particle_ix] + axis_position = axis_positions[time_ix][chain_ix][particle_ix - 1] direction = ReaddyPostProcessor._normalize(position - axis_position) result[time_ix].append( np.array( From e9921d2ec193c8d75566a1331a5ffd919a7cc059 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:30:09 -0400 Subject: [PATCH 26/63] Remove outdated visualization files --- .../visualization/add_readdy_plots.py | 116 ---- .../create_simularium_outputs.ipynb | 344 ----------- .../create_simularium_outputs.py | 580 ------------------ .../create_simularium_outputs_fix_readdy.py | 250 -------- 4 files changed, 1290 deletions(-) delete mode 100644 subcell_pipeline/visualization/add_readdy_plots.py delete mode 100644 subcell_pipeline/visualization/create_simularium_outputs.ipynb delete mode 100644 subcell_pipeline/visualization/create_simularium_outputs.py delete mode 100644 subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py diff --git a/subcell_pipeline/visualization/add_readdy_plots.py b/subcell_pipeline/visualization/add_readdy_plots.py deleted file mode 100644 index 77b82ac..0000000 --- a/subcell_pipeline/visualization/add_readdy_plots.py +++ /dev/null @@ -1,116 +0,0 @@ -import os - -import boto3 -import numpy as np -from botocore.exceptions import ClientError -from simularium_readdy_models.visualization import ActinVisualization - -BUCKET_NAME = "readdy-working-bucket" - -s3_client = boto3.client("s3") - - -def download_h5_file(file_name): - """ - Download files (skip files that already exist) - """ - if os.path.isfile(f"data/aws_downloads/{file_name}.h5"): - return - try: - s3_client.download_file( - BUCKET_NAME, - f"outputs/{file_name}.h5", - f"data/aws_downloads/{file_name}.h5", - ) - print(f"Downloaded {file_name}") - except ClientError: - print(f"!!! Failed to download {file_name}") - - -def download_data(conditions, num_repeats): - if not os.path.isdir("data"): - os.makedirs("data") - if not os.path.isdir("data/aws_downloads"): - os.makedirs("data/aws_downloads") - for repeat in range(num_repeats): - download_h5_file(f"actin_compression_baseline_{repeat}_0") - for condition in conditions: - download_h5_file(f"actin_compression_velocity={condition}_{repeat}") - - -def add_plots(parameters, total_steps, conditions, num_repeats): - """ - Re-visualize the trajectories to add plots - """ - for repeat in range(num_repeats): - ActinVisualization.analyze_and_visualize_trajectory( - f"data/aws_downloads/actin_compression_baseline_{repeat}_0", - total_steps["baseline"], - parameters, - ) - for condition in conditions: - ActinVisualization.analyze_and_visualize_trajectory( - f"data/aws_downloads/actin_compression_velocity={condition}_{repeat}", - total_steps[condition], - parameters, - ) - - -def upload_simularium_file(file_name): - """ - Upload files (warning for files that fail) - """ - if not os.path.isfile(f"data/aws_downloads/{file_name}.h5.simularium"): - print(f"!!! Not found, could not upload {file_name}") - return - try: - s3_client.upload_file( - f"data/aws_downloads/{file_name}.h5.simularium", - BUCKET_NAME, - f"outputs/{file_name}.h5.simularium", - ) - print(f"Uploaded {file_name}") - except ClientError as e: - print(f"!!! Failed to upload {file_name}") - - -def upload_to_s3(conditions, num_repeats): - for repeat in range(num_repeats): - upload_simularium_file(f"actin_compression_baseline_{repeat}_0") - for condition in conditions: - upload_simularium_file(f"actin_compression_velocity={condition}_{repeat}") - - -def main(): - num_repeats = 3 - conditions = [ - "4.7", - "15", - "47", - "150", - ] - total_steps = { - "4.7": 3.2e8, - "15": 1e8, - "47": 3.2e7, - "150": 1e7, - "baseline": 1e7, - } - parameters = { - "box_size": np.array([600.0, 600.0, 600.0]), - "internal_timestep": 0.1, - "longitudinal_bonds": True, - "periodic_boundary": False, - "plot_actin_structure": True, - "plot_actin_compression": True, - "visualize_edges": True, - "visualize_normals": True, - "visualize_control_pts": True, - } - # download_data(conditions, num_repeats) - # add_plots(parameters, total_steps, conditions, num_repeats) - upload_to_s3(conditions, num_repeats) - - -if __name__ == "__main__": - main() diff --git a/subcell_pipeline/visualization/create_simularium_outputs.ipynb b/subcell_pipeline/visualization/create_simularium_outputs.ipynb deleted file mode 100644 index 898574c..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs.ipynb +++ /dev/null @@ -1,344 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate Simularium Outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import boto3\n", - "import pandas as pd\n", - "import numpy as np\n", - "from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium\n", - "from subcell_analysis.compression_analysis import COMPRESSIONMETRIC" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from simulariumio.cytosim import CytosimConverter\n", - "from simulariumio import ScatterPlotData, TrajectoryConverter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_repeats = 5\n", - "config_id = 4" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download files (only needs to be done once)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "s3_client = boto3.client(\"s3\")\n", - "for repeat in range(num_repeats):\n", - " s3_client.download_file(\"cytosim-working-bucket\", f\"vary_compress_rate0006/outputs/{repeat}/fiber_segment_curvature.txt\", f\"data/fiber_segment_curvature_{repeat}.txt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process single repeat" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "repeat = 0\n", - "input_file_path = f\"data/fiber_segment_curvature_{repeat}.txt\"\n", - "\n", - "box_size = 3.0\n", - "scale_factor = 100\n", - "fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create cytosim converter object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cytosim_converter = CytosimConverter(fiber_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Read metric data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_path = f\"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv\"\n", - "df = pd.read_csv(df_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add metric plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for metric in plot_metrics:\n", - " metric_by_time = df.groupby([\"time\"])[metric.value].mean()\n", - " cytosim_converter.add_plot(\n", - " ScatterPlotData(\n", - " title=f\"{metric} over time\",\n", - " xaxis_title=\"Time\",\n", - " yaxis_title=metric.value,\n", - " xtrace=np.arange(len(metric_by_time))*1E-5,\n", - " ytraces={\n", - " f\"repeat {repeat}\": metric_by_time,\n", - " },\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save converted data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cytosim_converter.save(f\"outputs/vary_compress_rate_0006_repeat_{repeat}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process multiple repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "box_size = 3.0\n", - "scale_factor = 100\n", - "colors = [\"#F0F0F0\", \"#0000FF\", \"#FF0000\", \"#00FF00\", \"#FF00FF\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create initial trajectory data object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_file_path = f\"data/fiber_segment_curvature_0.txt\"\n", - "fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[0], actin_number=0)\n", - "cytosim_converter = CytosimConverter(fiber_data)\n", - "\n", - "trajectory_data = cytosim_converter._data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Append additional repeats to trajectory data object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for repeat in range(1, num_repeats):\n", - " input_file_path = f\"data/fiber_segment_curvature_{repeat}.txt\"\n", - " fiber_data = cytosim_to_simularium(input_file_path, box_size=box_size, scale_factor=scale_factor, color=colors[repeat], actin_number=repeat)\n", - " cytosim_converter = CytosimConverter(fiber_data)\n", - " new_agent_data = cytosim_converter._data.agent_data\n", - "\n", - " trajectory_data.append_agents(new_agent_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_repeats_converter = TrajectoryConverter(trajectory_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add plots for all repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_metrics = [COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, COMPRESSIONMETRIC.SUM_BENDING_ENERGY, COMPRESSIONMETRIC.PEAK_ASYMMETRY, COMPRESSIONMETRIC.NON_COPLANARITY]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get metrics for all repeats" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_list = []\n", - "for repeat in range(num_repeats):\n", - " df_path = f\"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv\"\n", - " df = pd.read_csv(df_path) \n", - " df[\"repeat\"] = repeat\n", - " df_list.append(df)\n", - "df_all = pd.concat(df_list)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add plots to converter object" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for metric in plot_metrics:\n", - " ytraces = {}\n", - " for repeat, df_repeat in df_all.groupby(\"repeat\"):\n", - " ytraces[f\"repeat {repeat}\"] = df_repeat.groupby([\"time\"])[metric.value].mean()\n", - "\n", - " all_repeats_converter.add_plot(\n", - " ScatterPlotData(\n", - " title=f\"{metric.value} over time\",\n", - " xaxis_title=\"Time\",\n", - " yaxis_title=metric.value,\n", - " xtrace=np.arange(metric_by_time.shape[0])*1E-5,\n", - " ytraces=ytraces,\n", - " render_mode=\"lines\",\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Save converted data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_repeats_converter.save(f\"outputs/vary_compress_rate_0006_all_repeats\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "subcell_analysis", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/subcell_pipeline/visualization/create_simularium_outputs.py b/subcell_pipeline/visualization/create_simularium_outputs.py deleted file mode 100644 index 81b624d..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs.py +++ /dev/null @@ -1,580 +0,0 @@ -import argparse -import math -import os -import sys -from typing import Dict, Tuple - -import boto3 -import numpy as np -import pandas as pd -from botocore.exceptions import ClientError -from pint import UnitRegistry -from scipy.spatial.transform import Rotation -from simulariumio import ( - DISPLAY_TYPE, - AgentData, - CameraData, - DisplayData, - FileConverter, - InputFileData, - MetaData, - ScatterPlotData, - TrajectoryConverter, - TrajectoryData, - UnitData, -) -from simulariumio.filters import EveryNthTimestepFilter -from subcell_analysis.compression_analysis import ( - COMPRESSIONMETRIC, - get_asymmetry_of_peak, - get_average_distance_from_end_to_end_axis, - get_bending_energy_from_trace, - get_contour_length_from_trace, - get_third_component_variance, -) -from subcell_analysis.compression_workflow_runner import compression_metrics_workflow -from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium - -CYTOSIM_CONDITIONS = { - "0001": 0.48, - "0002": 1.5, - "0003": 4.7, - "0004": 15, - "0005": 47, - "0006": 150, -} -READDY_CONDITIONS = [ - 4.7, - 15, - 47, - 150, -] -NUM_REPEATS = 5 -TOTAL_STEPS = 200 -POINTS_PER_FIBER = 200 -BENDING_ENERGY_SCALE_FACTOR = 1000.0 -CYTOSIM_SCALE_FACTOR = 1000.0 -BOX_SIZE = 600.0 - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Visualizes ReaDDy and Cytosim actin simulations" - ) - parser.add_argument("--combined", action=argparse.BooleanOptionalAction) - parser.set_defaults(combined=False) - parser.add_argument("--cytosim", action=argparse.BooleanOptionalAction) - parser.set_defaults(cytosim=False) - parser.add_argument("--upload", action=argparse.BooleanOptionalAction) - parser.set_defaults(upload=False) - return parser.parse_args() - - -s3_client = boto3.client("s3") - - -def download_s3_file(bucket_name, s3_path, dest_path) -> bool: - """ - Download files (skip files that already exist) - """ - if os.path.isfile(dest_path): - # already downloaded - return False - try: - s3_client.download_file( - bucket_name, - s3_path, - dest_path, - ) - print(f"Downloaded {dest_path}") - return True - except ClientError: - print(f"!!! Failed to download {s3_path}") - return False - - -def upload_file_to_s3(bucket_name, src_path, s3_path) -> bool: - """ - Upload a file to an S3 bucket - """ - if not os.path.isfile(src_path): - print(f"!!! File does not exist to upload {src_path}") - return False - try: - s3_client.upload_file(src_path, bucket_name, s3_path) - print(f"Uploaded to {s3_path}") - return True - except ClientError: - print(f"!!! Failed to upload {src_path}") - return False - - -def make_download_dirs(): - if not os.path.isdir("data"): - os.makedirs("data") - if not os.path.isdir("data/aws_downloads"): - os.makedirs("data/aws_downloads") - - -def download_combined_csv_data(): - make_download_dirs() - # combined csv is in ReaDDy bucket for now - download_s3_file( - bucket_name="readdy-working-bucket", - s3_path=f"outputs/{COMBINED_CSV_PATH}", - dest_path=f"data/aws_downloads/{COMBINED_CSV_PATH}", - ) - - -def download_cytosim_trajectory_data(): - make_download_dirs() - for condition in CYTOSIM_CONDITIONS.keys(): - for repeat_ix in range(NUM_REPEATS): - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"vary_compress_rate{condition}/outputs/{repeat_ix}/fiber_points.txt", - dest_path=f"data/aws_downloads/fiber_points_{condition}_{repeat_ix}.txt", - ) - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"vary_compress_rate{condition}/outputs/{repeat_ix}/singles.txt", - dest_path=f"data/aws_downloads/singles_{condition}_{repeat_ix}.txt", - ) - # baseline trajectories - for repeat_ix in range(NUM_REPEATS): - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"free_barbed_end_final/outputs/{repeat_ix}/fiber_points.txt", - dest_path=f"data/aws_downloads/fiber_points_baseline_{repeat_ix}.txt", - ) - download_s3_file( - bucket_name="cytosim-working-bucket", - s3_path=f"free_barbed_end_final/outputs/{repeat_ix}/singles.txt", - dest_path=f"data/aws_downloads/singles_baseline_{repeat_ix}.txt", - ) - - -def empty_scatter_plots( - total_steps: int = -1, - times: np.ndarray = None, - time_units: str = None, -) -> Dict[COMPRESSIONMETRIC, ScatterPlotData]: - if total_steps < 0 and times is None: - raise Exception("Either total_steps or times array is required for plots") - elif times is None: - # use normalized time - xlabel = "T (normalized)" - xtrace = (1 / float(total_steps)) * np.arange(total_steps) - else: - # use actual time - xlabel = f"T ({time_units})" - xtrace = times - total_steps = times.shape[0] - return { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: ScatterPlotData( - title="Average Perpendicular Distance", - xaxis_title=xlabel, - yaxis_title="distance (nm)", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 85.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: ScatterPlotData( - title="Bending Energy", - xaxis_title=xlabel, - yaxis_title="energy", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 10.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.NON_COPLANARITY: ScatterPlotData( - title="Non-coplanarity", - xaxis_title=xlabel, - yaxis_title="3rd component variance from PCA", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.03 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.PEAK_ASYMMETRY: ScatterPlotData( - title="Peak Asymmetry", - xaxis_title=xlabel, - yaxis_title="normalized peak distance", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.5 * np.ones(total_steps), - }, - render_mode="lines", - ), - COMPRESSIONMETRIC.CONTOUR_LENGTH: ScatterPlotData( - title="Contour Length", - xaxis_title=xlabel, - yaxis_title="filament contour length (nm)", - xtrace=xtrace, - ytraces={ - "<<<": 480 * np.ones(total_steps), - ">>>": 505 * np.ones(total_steps), - }, - render_mode="lines", - ), - } - - -def rmsd(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray: - return np.sqrt(((((vec1 - vec2) ** 2)) * 3).mean()) - - -def align(fibers: np.ndarray) -> np.ndarray: - """ - Rotationally align the given fibers around the x-axis. - - Parameters - ---------- - fiber_points: np.ndarray (shape = time x fiber x (3 * points_per_fiber)) - Array containing the flattened x,y,z positions of control points - for each fiber at each time. - - Returns - ---------- - aligned_data: np.ndarray - The given data aligned. - """ - # get angle to align each fiber at the last time point - align_by = [] - points_per_fiber = int(fibers.shape[2] / 3) - ref = fibers[-1][0].copy().reshape((points_per_fiber, 3)) - for fiber_ix in range(len(fibers[-1])): - best_rmsd = math.inf - for angle in np.linspace(0, 2 * np.pi, 1000): - rot = Rotation.from_rotvec(angle * np.array([1, 0, 0])) - new_vec = Rotation.apply( - rot, fibers[-1][fiber_ix].copy().reshape((points_per_fiber, 3)) - ) - test_rmsd = rmsd(new_vec, ref) - if test_rmsd < best_rmsd: - best_angle = angle - best_rmsd = test_rmsd - align_by.append(best_angle) - # align all the fibers to ref across all time points - aligned = np.zeros_like(fibers) - for fiber_ix in range(fibers.shape[1]): - rot = Rotation.from_rotvec(align_by[fiber_ix] * np.array([1, 0, 0])) - for time_ix in range(fibers.shape[0]): - fiber = fibers[time_ix][fiber_ix].copy().reshape((points_per_fiber, 3)) - new_fiber = Rotation.apply(rot, fiber) - aligned[time_ix][fiber_ix] = new_fiber.flatten() - return aligned - - -def save_combined_simularium(): - df = pd.read_csv(f"data/aws_downloads/{COMBINED_CSV_PATH}") - simulators = ["cytosim", "readdy"] - colors = { - "cytosim": [ - "#4DFE8A", - "#c1fe4d", - "#fee34d", - "#fe8b4d", - ], - "readdy": [ - "#94dbfc", - "#627EFB", - "#b594fc", - "#e994fc", - ], - } - total_conditions = NUM_REPEATS * len(simulators) * len(CYTOSIM_CONDITIONS.keys()) - subpoints = np.zeros((TOTAL_STEPS, total_conditions, 3 * POINTS_PER_FIBER)) - type_names = [] - display_data = {} - scatter_plots = empty_scatter_plots(total_steps=TOTAL_STEPS) - # these metrics need to be multiplied by 1000 in cytosim because of different units - cytosim_metrics_to_scale = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.CONTOUR_LENGTH, - ] - for sim_ix, simulator in enumerate(simulators): - sim_df = df.loc[df["simulator"] == simulator] - sim_df.sort_values( - by=["repeat", "simulator", "velocity", "time", "monomer_ids"] - ) - for condition_ix, condition in enumerate(READDY_CONDITIONS): - condition_df = sim_df.loc[sim_df["velocity"] == condition] - for repeat_ix in range(NUM_REPEATS): - rep_df = condition_df.loc[condition_df["repeat"] == repeat_ix] - for time_ix in range(TOTAL_STEPS): - ix = ( - (sim_ix * len(READDY_CONDITIONS) * NUM_REPEATS) - + (condition_ix * NUM_REPEATS) - + repeat_ix - ) - subpoints[time_ix][ix] = ( - CYTOSIM_SCALE_FACTOR if simulator == "cytosim" else 1 - ) * np.array( - rep_df[time_ix * TOTAL_STEPS : (time_ix + 1) * TOTAL_STEPS][ - ["xpos", "ypos", "zpos"] - ] - ).flatten() - type_names.append(f"{simulator}#{condition} um/s {repeat_ix}") - display_data[type_names[-1]] = DisplayData( - name=type_names[-1], - display_type=DISPLAY_TYPE.FIBER, - color=colors[simulator][condition_ix], - ) - metrics_df = compression_metrics_workflow( - rep_df.copy(), list(scatter_plots.keys()) - ) - metrics_df = metrics_df[metrics_df["monomer_ids"] == 0] - for metric in scatter_plots: - scale_factor = ( - CYTOSIM_SCALE_FACTOR - if ( - ( - simulator == "cytosim" - and metric in cytosim_metrics_to_scale - ) - or metric == COMPRESSIONMETRIC.CALC_BENDING_ENERGY - ) - else 1.0 - ) - scatter_plots[metric].ytraces[type_names[-1]] = ( - scale_factor * np.array(metrics_df[metric.value]) - ) - traj_data = TrajectoryData( - meta_data=MetaData( - box_size=np.array([BOX_SIZE, BOX_SIZE, BOX_SIZE]), - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin compression in Cytosim and Readdy", - ), - agent_data=AgentData( - times=np.arange(TOTAL_STEPS), - n_agents=total_conditions * np.ones((TOTAL_STEPS)), - viz_types=1001 - * np.ones((TOTAL_STEPS, total_conditions)), # fiber viz type = 1001 - unique_ids=np.array(TOTAL_STEPS * [list(range(total_conditions))]), - types=TOTAL_STEPS * [type_names], - positions=np.zeros((TOTAL_STEPS, total_conditions, 3)), - radii=np.ones((TOTAL_STEPS, total_conditions)), - n_subpoints=3 * POINTS_PER_FIBER * np.ones((TOTAL_STEPS, total_conditions)), - subpoints=align(subpoints), - display_data=display_data, - ), - time_units=UnitData("count"), # frames - spatial_units=UnitData("nm"), # nanometer - ) - converter = TrajectoryConverter(traj_data) - for metric, plot in scatter_plots.items(): - converter.add_plot(plot, "scatter") - converter.save(f"data/actin_compression") - - -def time_increment(raw_total_steps): - """ - Find a time increment to get the total steps close to 1000 - """ - if raw_total_steps < 2000: - return 1 - magnitude = math.floor(math.log(raw_total_steps, 10)) - amount = raw_total_steps / 10**magnitude - if amount > 5: - return 5 * 10 ** (magnitude - 3) - return 10 ** (magnitude - 3) - - -ureg = UnitRegistry() - - -def find_time_units(raw_time: float, units: str = "s") -> Tuple[str, float]: - """ - Get the compact time units and a multiplier to put the times in those units - """ - time = ureg.Quantity(raw_time, units) - time = time.to_compact() - return "{:~}".format(time.units), time.magnitude / raw_time - - -def generate_plot_data(subpoints): - n_points = int(subpoints.shape[2] / 3.0) - result = { - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE: [], - COMPRESSIONMETRIC.CALC_BENDING_ENERGY: [], - COMPRESSIONMETRIC.NON_COPLANARITY: [], - COMPRESSIONMETRIC.PEAK_ASYMMETRY: [], - COMPRESSIONMETRIC.CONTOUR_LENGTH: [], - } - total_steps = subpoints.shape[0] - for time_ix in range(total_steps): - points = subpoints[time_ix][0].reshape((n_points, 3)) - result[COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE].append( - get_average_distance_from_end_to_end_axis( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CALC_BENDING_ENERGY].append( - BENDING_ENERGY_SCALE_FACTOR - * get_bending_energy_from_trace( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.NON_COPLANARITY].append( - get_third_component_variance( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.PEAK_ASYMMETRY].append( - get_asymmetry_of_peak( - polymer_trace=points, - ) - ) - result[COMPRESSIONMETRIC.CONTOUR_LENGTH].append( - get_contour_length_from_trace( - polymer_trace=points, - ) - ) - return result - - -def filter_time(converter) -> TrajectoryConverter: - """ - Use Simulariumio time filter - """ - time_inc = int(converter._data.agent_data.times.shape[0] / 1000.0) - if time_inc < 2: - return converter - converter._data = converter.filter_data( - [ - EveryNthTimestepFilter( - n=time_inc, - ), - ] - ) - return converter - - -def generate_cytosim_simularium(condition, repeat_ix) -> Tuple[TrajectoryData, str]: - is_baseline = condition == "baseline" - velocity = CYTOSIM_CONDITIONS[condition] if not is_baseline else 0.0 - condition_name = f"velocity={velocity}" if not is_baseline else condition - fiber_points_path = f"data/aws_downloads/fiber_points_{condition}_{repeat_ix}.txt" - singles_path = f"data/aws_downloads/singles_{condition}_{repeat_ix}.txt" - output_path = f"data/cytosim_outputs/actin_compression_{condition_name}_{repeat_ix}" - if os.path.isfile(f"{output_path}.simularium"): - print(f"Skipping v={velocity} #{repeat_ix}, output file already exists") - return None, "" - if not os.path.isfile(fiber_points_path): - raise Exception(f"fiber_points_{condition}_{repeat_ix}.txt not found") - if not os.path.isfile(singles_path): - singles_path = None - print(f"Converting Cytosim {condition_name} #{repeat_ix}") - short_condition_name = f"v={velocity}" if not is_baseline else condition - traj_data = cytosim_to_simularium( - title=f"Actin Compression {short_condition_name} {repeat_ix}", - fiber_points_path=fiber_points_path, - singles_path=singles_path, - scale_factor=CYTOSIM_SCALE_FACTOR, - ) - converter = filter_time(TrajectoryConverter(traj_data)) - time_units, time_multiplier = find_time_units(converter._data.agent_data.times[-1]) - converter._data.agent_data.times *= time_multiplier - converter._data.time_units = UnitData(time_units) - # plots - plot_data = generate_plot_data(converter._data.agent_data.subpoints) - scatter_plots = empty_scatter_plots( - times=converter._data.agent_data.times, - time_units=time_units, - ) - for metric, plot in scatter_plots.items(): - plot.ytraces["filament"] = np.array(plot_data[metric]) - - try: - converter.add_plot(plot, "scatter") - except: - import ipdb - - ipdb.set_trace() - - return converter._data, f"{condition_name}_{repeat_ix}" - - -def load_all_cytosim_simularium(baseline: bool = True) -> Dict[str, TrajectoryData]: - result = {} - for condition in CYTOSIM_CONDITIONS.keys(): - for repeat_ix in range(NUM_REPEATS): - traj_data, condition_name = generate_cytosim_simularium( - condition, repeat_ix - ) - if traj_data is not None: - result[condition_name] = traj_data - if not baseline: - return result - for repeat_ix in range(NUM_REPEATS): - traj_data, condition_name = generate_cytosim_simularium("baseline", repeat_ix) - if traj_data is not None: - result[condition_name] = traj_data - return result - - -def save_cytosim_trajectories(cytosim_traj_data: Dict[str, TrajectoryData]): - if not os.path.isdir("data/cytosim_outputs"): - os.makedirs("data/cytosim_outputs") - for condition_name, traj_data in cytosim_traj_data.items(): - TrajectoryConverter(traj_data).save( - f"data/cytosim_outputs/actin_compression_{condition_name}" - ) - - -def upload_cytosim_trajectories(): - for condition in CYTOSIM_CONDITIONS.keys(): - velocity = CYTOSIM_CONDITIONS[condition] - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_velocity={velocity}_{repeat}.simularium", - s3_path=f"simularium/actin_compression_velocity={velocity}_{repeat}.simularium", - ) - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_baseline_{repeat}.simularium", - s3_path=f"simularium/actin_compression_baseline_{repeat}.simularium", - ) - - -def main(): - args = parse_args() - if not (args.combined or args.cytosim): - print("Please specify either --combined or --cytosim arguments") - if args.combined: - # save one simularium file with all cytosim and readdy trajectories - download_combined_csv_data() - save_combined_simularium() - if args.upload: - upload_file_to_s3( - bucket_name="readdy-working-bucket", - src_path=f"data/actin_compression.simularium", - s3_path=f"outputs/actin_compression_cytosim_readdy.simularium", - ) - elif args.cytosim: - # save an individual simularium file for each cytosim trajectory - download_cytosim_trajectory_data() - cytosim_traj_data = load_all_cytosim_simularium() - save_cytosim_trajectories(cytosim_traj_data) - if args.upload: - upload_cytosim_trajectories() - - -if __name__ == "__main__": - main() diff --git a/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py b/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py deleted file mode 100644 index d41d9f8..0000000 --- a/subcell_pipeline/visualization/create_simularium_outputs_fix_readdy.py +++ /dev/null @@ -1,250 +0,0 @@ -import argparse -import math -import os -import sys -from typing import Dict, Tuple - -import boto3 -import numpy as np -import pandas as pd -from botocore.exceptions import ClientError -from pint import UnitRegistry -from scipy.spatial.transform import Rotation -from simulariumio import ( - DISPLAY_TYPE, - AgentData, - CameraData, - DisplayData, - FileConverter, - InputFileData, - MetaData, - ScatterPlotData, - TrajectoryConverter, - TrajectoryData, - UnitData, -) -from simulariumio.filters import EveryNthTimestepFilter -from subcell_analysis.compression_analysis import ( - COMPRESSIONMETRIC, - get_asymmetry_of_peak, - get_average_distance_from_end_to_end_axis, - get_bending_energy_from_trace, - get_contour_length_from_trace, - get_third_component_variance, -) -from subcell_analysis.compression_workflow_runner import compression_metrics_workflow -from subcell_analysis.cytosim.post_process_cytosim import cytosim_to_simularium - -CYTOSIM_CONDITIONS = { - "0001": 0.48, - "0002": 1.5, - "0003": 4.7, - "0004": 15, - "0005": 47, - "0006": 150, -} -READDY_CONDITIONS = [ - 4.7, - 15, - 47, - 150, -] -NUM_REPEATS = 5 -TOTAL_STEPS = 200 -POINTS_PER_FIBER = 200 -BENDING_ENERGY_SCALE_FACTOR = 1000.0 -CYTOSIM_SCALE_FACTOR = 1000.0 -BOX_SIZE = 600.0 - - -def parse_args(): - parser = argparse.ArgumentParser( - description="Visualizes ReaDDy and Cytosim actin simulations" - ) - parser.add_argument("--combined", action=argparse.BooleanOptionalAction) - parser.set_defaults(combined=False) - parser.add_argument("--cytosim", action=argparse.BooleanOptionalAction) - parser.set_defaults(cytosim=False) - parser.add_argument("--upload", action=argparse.BooleanOptionalAction) - parser.set_defaults(upload=False) - return parser.parse_args() - - -s3_client = boto3.client("s3") -for repeat in range(num_repeats): - s3_client.download_file( - "cytosim-working-bucket", - f"vary_compress_rate0003/outputs/{repeat}/fiber_segment_curvature.txt", - f"../data/fiber_segment_curvature_{repeat}.txt", - ) - -# %% [markdown] -# ### Process single repeat - -# %% -repeat = 0 -input_file_path = f"../data/fiber_segment_curvature_{repeat}.txt" - -box_size = 3.0 -scale_factor = 100 -fiber_data = cytosim_to_simularium( - input_file_path, box_size=box_size, scale_factor=scale_factor -) - -# %% [markdown] -# Create cytosim converter object - -# %% -cytosim_converter = CytosimConverter(fiber_data) - -# %% [markdown] -# Read metric data - -# # %% -# df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv" -# df = pd.read_csv(df_path) - -# %% [markdown] -# Add metric plots - -# %% -plot_metrics = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - COMPRESSIONMETRIC.SUM_BENDING_ENERGY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.NON_COPLANARITY, -] - -# # %% -# for metric in plot_metrics: -# metric_by_time = df.groupby(["time"])[metric.value].mean() -# cytosim_converter.add_plot( -# ScatterPlotData( -# title=f"{metric} over time", -# xaxis_title="Time", -# yaxis_title=metric.value, -# xtrace=np.arange(len(metric_by_time)) * 1e-5, -# ytraces={ -# f"repeat {repeat}": metric_by_time, -# }, -# ) -# ) - -# %% [markdown] -# Save converted data - -# %% -cytosim_converter.save(f"outputs/free_barbed_end_final{repeat}") - -# %% [markdown] -# ### Process multiple repeats - -# %% -box_size = 3.0 -scale_factor = 100 -colors = ["#F0F0F0", "#0000FF", "#FF0000", "#00FF00", "#FF00FF"] - -# %% [markdown] -# Create initial trajectory data object - -# # %% -# input_file_path = f"data/fiber_segment_curvature_0.txt" -# fiber_data = cytosim_to_simularium( -# input_file_path, -# box_size=box_size, -# scale_factor=scale_factor, -# color=colors[0], -# actin_number=0, -# ) -# cytosim_converter = CytosimConverter(fiber_data) - -# trajectory_data = cytosim_converter._data - -# %% [markdown] -# Append additional repeats to trajectory data object - -# # %% -# for repeat in range(1, num_repeats): -# input_file_path = f"data/fiber_segment_curvature_{repeat}.txt" -# fiber_data = cytosim_to_simularium( -# input_file_path, -# box_size=box_size, -# scale_factor=scale_factor, -# color=colors[repeat], -# actin_number=repeat, -# ) -# cytosim_converter = CytosimConverter(fiber_data) -# new_agent_data = cytosim_converter._data.agent_data - -# trajectory_data.append_agents(new_agent_data) - -# # %% -# all_repeats_converter = TrajectoryConverter(trajectory_data) - -# %% [markdown] -# ### Add plots for all repeats - -# %% -plot_metrics = [ - COMPRESSIONMETRIC.AVERAGE_PERP_DISTANCE, - COMPRESSIONMETRIC.TOTAL_FIBER_TWIST, - COMPRESSIONMETRIC.SUM_BENDING_ENERGY, - COMPRESSIONMETRIC.PEAK_ASYMMETRY, - COMPRESSIONMETRIC.NON_COPLANARITY, -] - -# %% [markdown] -# Get metrics for all repeats - -# # %% -# df_list = [] -# for repeat in range(num_repeats): -# df_path = f"dataframes/actin_forces{config_id}_{repeat}_compression_metrics.csv" -# df = pd.read_csv(df_path) -# df["repeat"] = repeat -# df_list.append(df) -# df_all = pd.concat(df_list) - -# %% [markdown] -# Add plots to converter object - -# # %% -# for metric in plot_metrics: -# ytraces = {} -# for repeat, df_repeat in df_all.groupby("repeat"): -# ytraces[f"repeat {repeat}"] = df_repeat.groupby(["time"])[metric.value].mean() - -# all_repeats_converter.add_plot( -# ScatterPlotData( -# title=f"{metric.value} over time", -# xaxis_title="Time", -# yaxis_title=metric.value, -# xtrace=np.arange(metric_by_time.shape[0]) * 1e-5, -# ytraces=ytraces, -# render_mode="lines", -# ) -# ) - - -def upload_cytosim_trajectories(): - for condition in CYTOSIM_CONDITIONS.keys(): - velocity = CYTOSIM_CONDITIONS[condition] - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_velocity={velocity}_{repeat}.simularium", - s3_path=f"simularium/actin_compression_velocity={velocity}_{repeat}.simularium", - ) - for repeat in range(NUM_REPEATS): - upload_file_to_s3( - bucket_name="cytosim-working-bucket", - src_path=f"data/cytosim_outputs/actin_compression_baseline_{repeat}.simularium", - s3_path=f"simularium/actin_compression_baseline_{repeat}.simularium", - ) - - -# %% -cytosim_converter.save(f"outputs/vary_compress_rate_0003_all_repeats") - -# %% From 881125bf7b3292033e6aaa8d65585966f70ea565 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:27:05 -0400 Subject: [PATCH 27/63] Update individual simulator visualization workflows --- .../_visualize_cytosim_trajectories.py | 128 +++++ .../_visualize_individual_trajectories.py | 120 ---- .../_visualize_readdy_trajectories.py | 146 +++++ .../visualization/display_data.py | 110 ++++ .../visualization/individual_trajectory.py | 526 ++++++++++++++++++ .../visualization/spatial_annotator.py | 8 +- 6 files changed, 914 insertions(+), 124 deletions(-) create mode 100644 subcell_pipeline/visualization/_visualize_cytosim_trajectories.py delete mode 100644 subcell_pipeline/visualization/_visualize_individual_trajectories.py create mode 100644 subcell_pipeline/visualization/_visualize_readdy_trajectories.py create mode 100644 subcell_pipeline/visualization/display_data.py create mode 100644 subcell_pipeline/visualization/individual_trajectory.py diff --git a/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py new file mode 100644 index 0000000..0cbc7b3 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py @@ -0,0 +1,128 @@ +# %% [markdown] +# # Visualize Cytosim simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing Cytosim simulations of a single actin +fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize compression simulations](#visualize-compression-simulations) +- [Visualize no compression simulations](#visualize-no-compression-simulations) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.individual_trajectory import ( + visualize_individual_cytosim_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# S3 bucket for input and output files +bucket: str = "s3://cytosim-working-bucket" + +# Number of timepoints +n_timepoints = 200 + +# Specify whether the visualization should be recalculated. Set this to true if +# you make changes to any visualization functions. +recalculate: bool = True + +# Random seeds for simulations +random_seeds: list[int] = [1, 2, 3, 4, 5] + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CALC_BENDING_ENERGY, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize compression simulations + +The `COMPRESSION_VELOCITY` simulation series compresses a single 500 nm actin +fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five +replicates each. + +Iterate through all condition keys and replicates to load simulation output +files and visualize them. If the visualization file for a given condition key +and replicate already exists and recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index)/` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(seed).simularium` +""" + +# %% +# Name of the simulation series +compression_series_name: str = "COMPRESSION_VELOCITY" + +# List of condition file keys for each velocity +compression_condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# %% +visualize_individual_cytosim_trajectories( + bucket, + compression_series_name, + compression_condition_keys, + random_seeds, + n_timepoints, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) + +# %% [markdown] +""" +## Visualize no compression simulations + +The `NO_COMPRESSION` simulation series simulates a single actin fiber with a +free barbed end across five replicates. + +Iterate through all replicates to load simulation output files and visualize +them. If the visualization file for a given replicate already exists and +recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index)/` +- Output: `(series_name)/viz/(series_name)_(seed).simularium` +""" + +# %% +# Name of the simulation series +no_compression_series_name: str = "NO_COMPRESSION" + +# %% +visualize_individual_cytosim_trajectories( + bucket, + no_compression_series_name, + [""], + random_seeds, + n_timepoints, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) diff --git a/subcell_pipeline/visualization/_visualize_individual_trajectories.py b/subcell_pipeline/visualization/_visualize_individual_trajectories.py deleted file mode 100644 index e137d7f..0000000 --- a/subcell_pipeline/visualization/_visualize_individual_trajectories.py +++ /dev/null @@ -1,120 +0,0 @@ -# %% [markdown] -# # Process ReaDDy simulations - -# %% [markdown] -""" - -Notebook contains steps for visualizing ReaDDy and Cytosim -simulations of a single actin fiber. - -- [Visualize ReaDDy](#visualize-readdy) -- [Visualize Cytosim](#visualize-cytosim) -""" - -# %% -if __name__ != "__main__": - raise ImportError("This module is a notebook and is not meant to be imported") - -# %% [markdown] -""" -## Visualize ReaDDy - -Iterate through all condition keys and replicates to load simulation -output files and visualize them. If the visualization file for a given -condition key and replicate already exists and recalculate is False, -parsing is skipped. - -- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index+1).h5` -- Output: `(series_name)/viz/(series_name)_(condition_key)_(index+1).simularium` -""" - -# %% -from subcell_pipeline.visualization.visualizer import ( - visualize_individual_readdy_trajectories, -) -# %% -# Name of the simulation series -series_name: str = "ACTIN_COMPRESSION_VELOCITY" - -# S3 bucket for input and output files -bucket: str = "s3://readdy-working-bucket" - -# Number of simulation replicates -n_replicates: int = 5 - -# List of condition file keys for each velocity -condition_keys: list[str] = ["0047", "0150", "0470", "1500"] - -# Number of timepoints -n_timepoints = 200 - -# Number of monomer points per fiber -n_monomer_points = 200 - -visualize_individual_readdy_trajectories( - bucket, - "ACTIN_NO_COMPRESSION", - [""], - n_replicates, - n_timepoints, - n_monomer_points, - recalculate=True, -) - -visualize_individual_readdy_trajectories( - bucket, - "ACTIN_COMPRESSION_VELOCITY", - condition_keys, - n_replicates, - n_timepoints, - n_monomer_points, - recalculate=True, -) - -# %% [markdown] -""" -## Visualize Cytosim - -Iterate through all condition keys and random seeds to load simulation output -dataframes and visualize them. If the visualization file for a given -condition key and random seed already exists and recalculate is False, -parsing is skipped. - -- Input: `(series_name)/samples/(series_name)_(condition_key)_(seed)/` -- Output: `(series_name)/viz/(series_name)_(condition_key)_(seed).simularium` -""" - -# %% -from subcell_pipeline.visualization.visualizer import ( - visualize_individual_cytosim_trajectories, -) -# %% -# S3 bucket for input and output files -bucket: str = "s3://cytosim-working-bucket" - -# Random seeds for simulations -random_seeds: list[int] = [1, 2, 3, 4, 5] - -# List of condition file keys for each velocity -condition_keys: list[str] = ["0047", "0150", "0470", "1500"] - -# Number of timepoints -n_timepoints = 200 - -visualize_individual_cytosim_trajectories( - bucket, - "NO_COMPRESSION", - [""], - random_seeds, - n_timepoints, - recalculate=True, -) - -visualize_individual_cytosim_trajectories( - bucket, - "COMPRESSION_VELOCITY", - condition_keys, - random_seeds, - n_timepoints, - recalculate=True, -) diff --git a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py new file mode 100644 index 0000000..dd1ccac --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py @@ -0,0 +1,146 @@ +# %% [markdown] +# # Visualize ReaDDy simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing ReaDDy simulations of a single actin +fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize compression simulations](#visualize-compression-simulations) +- [Visualize no compression simulations](#visualize-no-compression-simulations) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.individual_trajectory import ( + visualize_individual_readdy_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings + +Define simulation and visualization settings that are shared between different +simulation series. +""" + +# %% +# S3 bucket for input and output files +bucket: str = "s3://readdy-working-bucket" + +# Number of simulation replicates +n_replicates: int = 5 + +# Number of timepoints +n_timepoints = 200 + +# Number of monomer points per fiber +n_monomer_points = 200 + +# Specify whether the visualization should be recalculated. Set this to true if +# you make changes to any visualization functions. +recalculate: bool = True + +# Temporary path to save downloaded trajectories +temp_path: Path = Path(__file__).parents[2] / "aws_downloads" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CALC_BENDING_ENERGY, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize compression simulations + +The `ACTIN_COMPRESSION_VELOCITY` simulation series compresses a single 500 nm +actin fiber at four different velocities (4.7, 15, 47, and 150 μm/s) with five +replicates each. + +Iterate through all condition keys and replicates to load simulation output +files and visualize them. If the visualization file for a given condition key +and replicate already exists and recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(condition_key)_(index + 1).h5` +- Output: `(series_name)/viz/(series_name)_(condition_key)_(index + 1).simularium` +""" + +# %% +# Name of the simulation series +compression_series_name: str = "ACTIN_COMPRESSION_VELOCITY" + +# List of condition file keys for each velocity +compression_condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Total number of steps for each condition +compression_total_steps: dict[str, int] = { + "0047": int(3.2e8), + "0150": int(1e8), + "0470": int(3.2e7), + "1500": int(1e7), +} + +# %% +visualize_individual_readdy_trajectories( + bucket, + compression_series_name, + compression_condition_keys, + n_replicates, + n_timepoints, + n_monomer_points, + compression_total_steps, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) + +# %% [markdown] +""" +## Visualize no compression simulations + +The `ACTIN_NO_COMPRESSION` simulation series simulates a single actin fiber with +a free barbed end across five replicates. + +Iterate through all replicates to load simulation output files and visualize +them. If the visualization file for a given replicate already exists and +recalculate is False, visualization is skipped. + +- Input: `(series_name)/outputs/(series_name)_(index + 1).h5` +- Output: `(series_name)/viz/(series_name)_(index + 1).simularium` +""" + +# %% +# Name of the simulation series +no_compression_series_name: str = "ACTIN_NO_COMPRESSION" + +# Total number of steps for each condition +no_compression_total_steps: dict[str, int] = {"": int(1e7)} + +# %% +visualize_individual_readdy_trajectories( + bucket, + no_compression_series_name, + [""], + n_replicates, + n_timepoints, + n_monomer_points, + no_compression_total_steps, + str(temp_path), + metrics=metrics, + recalculate=recalculate, +) diff --git a/subcell_pipeline/visualization/display_data.py b/subcell_pipeline/visualization/display_data.py new file mode 100644 index 0000000..9c44635 --- /dev/null +++ b/subcell_pipeline/visualization/display_data.py @@ -0,0 +1,110 @@ +from simulariumio import DISPLAY_TYPE, DisplayData + + +def get_readdy_display_data() -> dict[str, DisplayData]: + extra_radius = 1.5 + actin_radius = 2.0 + extra_radius + n_polymer_numbers = 5 + result = {} + for i in range(1, n_polymer_numbers + 1): + result.update( + { + f"actin#{i}": DisplayData( + name="actin", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_{i}": DisplayData( + name="actin#mid", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#fixed_{i}": DisplayData( + name="actin#fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#mid_fixed_{i}": DisplayData( + name="actin#mid_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#bf9b30", + ), + f"actin#ATP_{i}": DisplayData( + name="actin#ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_ATP_{i}": DisplayData( + name="actin#mid_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#fixed_ATP_{i}": DisplayData( + name="actin#fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#mid_fixed_ATP_{i}": DisplayData( + name="actin#mid_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffbf00", + ), + f"actin#barbed_{i}": DisplayData( + name="actin#barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#barbed_ATP_{i}": DisplayData( + name="actin#barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_{i}": DisplayData( + name="actin#fixed_barbed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#fixed_barbed_ATP_{i}": DisplayData( + name="actin#fixed_barbed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#ffdc73", + ), + f"actin#pointed_{i}": DisplayData( + name="actin#pointed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_ATP_{i}": DisplayData( + name="actin#pointed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_{i}": DisplayData( + name="actin#pointed_fixed", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + f"actin#pointed_fixed_ATP_{i}": DisplayData( + name="actin#pointed_fixed_ATP", + display_type=DISPLAY_TYPE.SPHERE, + radius=actin_radius, + color="#a67c00", + ), + }, + ) + return result diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py new file mode 100644 index 0000000..a4d400f --- /dev/null +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -0,0 +1,526 @@ +"""Visualization methods for individual simulators.""" + +from typing import Optional + +import numpy as np +import pandas as pd +from io_collection.keys.check_key import check_key +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_text import load_text +from io_collection.save.save_buffer import save_buffer +from pint import UnitRegistry +from simulariumio import ( + DISPLAY_TYPE, + CameraData, + DisplayData, + InputFileData, + MetaData, + ScatterPlotData, + TrajectoryConverter, + UnitData, +) +from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo +from simulariumio.filters import EveryNthTimestepFilter +from simulariumio.readdy import ReaddyConverter, ReaddyData + +from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( + get_compression_metric_data, +) +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.simulation.cytosim.post_processing import CYTOSIM_SCALE_FACTOR +from subcell_pipeline.simulation.readdy.loader import ReaddyLoader +from subcell_pipeline.simulation.readdy.parser import BOX_SIZE as READDY_BOX_SIZE +from subcell_pipeline.simulation.readdy.parser import ( + READDY_TIMESTEP, + download_readdy_hdf5, +) +from subcell_pipeline.simulation.readdy.post_processor import ReaddyPostProcessor +from subcell_pipeline.visualization.display_data import get_readdy_display_data +from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator + +READDY_SAVED_FRAMES: int = 1000 + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) + +UNIT_REGISTRY = UnitRegistry() + + +def _empty_scatter_plots( + metrics: list[CompressionMetric], + total_steps: int = -1, + times: Optional[np.ndarray] = None, + time_units: Optional[str] = None, +) -> dict[CompressionMetric, ScatterPlotData]: + """Create empty scatter plot placeholders for list of metrics.""" + + if total_steps < 0 and times is None: + raise Exception("Either total_steps or times array is required for plots") + elif times is None: + # use normalized time + xlabel = "T (normalized)" + xtrace = (1 / float(total_steps)) * np.arange(total_steps) + else: + # use actual time + xlabel = f"T ({time_units})" + xtrace = times + total_steps = times.shape[0] + + plots = {} + + for metric in metrics: + lower_bound, upper_bound = metric.bounds() + plots[metric] = ScatterPlotData( + title=metric.label(), + xaxis_title=xlabel, + yaxis_title=metric.description(), + xtrace=xtrace, + ytraces={ + "<<<": lower_bound * np.ones(total_steps), + ">>>": upper_bound * np.ones(total_steps), + }, + render_mode="lines", + ) + + return plots + + +def _add_individual_plots( + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, +) -> None: + """Add plots to individual trajectory with calculated metrics.""" + times = metrics_data["time"].values + scatter_plots = _empty_scatter_plots(metrics, times=times) + for metric, plot in scatter_plots.items(): + plot.ytraces["filament"] = np.array(metrics_data[metric.value]) + converter.add_plot(plot, "scatter") + + +def _add_readdy_spatial_annotations( + converter: TrajectoryConverter, + post_processor: ReaddyPostProcessor, + n_monomer_points: int, +) -> None: + """ + Add visualizations of edges, normals, and control points to the ReaDDy + Simularium data. + """ + # edges + edges = post_processor.edge_positions() + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=edges, + type_name="edge", + fiber_width=0.5, + color="#eaeaea", + ) + + fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + fiber_points = post_processor.linear_fiber_control_points( + axis_positions=axis_positions, + n_points=n_monomer_points, + ) + + # normals + normals = post_processor.linear_fiber_normals( + fiber_chain_ids=fiber_chain_ids, + axis_positions=axis_positions, + normal_length=10.0, + ) + converter._data = SpatialAnnotator.add_fiber_agents( + converter._data, + fiber_points=normals, + type_name="normal", + fiber_width=0.5, + color="#685bf3", + ) + + # control points + sphere_positions = [] + for time_ix in range(len(fiber_points)): + sphere_positions.append(fiber_points[time_ix][0]) + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + sphere_positions, + type_name="fiber point", + radius=0.8, + color="#eaeaea", + ) + + +def get_readdy_simularium_converter( + path_to_readdy_h5: str, total_steps: int +) -> TrajectoryConverter: + """ + Load from ReaDDy outputs and generate a TrajectoryConverter to visualize an + actin trajectory in Simularium. + """ + return ReaddyConverter( + ReaddyData( + timestep=1e-6 * (READDY_TIMESTEP * total_steps / READDY_SAVED_FRAMES), + path_to_readdy_h5=path_to_readdy_h5, + meta_data=MetaData( + box_size=READDY_BOX_SIZE, + camera_defaults=CameraData( + position=np.array([0.0, 0.0, 300.0]), + look_at_position=np.zeros(3), + up_vector=np.array([0.0, 1.0, 0.0]), + fov_degrees=120.0, + ), + scale_factor=1.0, + ), + display_data=get_readdy_display_data(), + time_units=UnitData("ms"), + spatial_units=UnitData("nm"), + ) + ) + + +def visualize_individual_readdy_trajectory( + bucket: str, + series_name: str, + series_key: str, + rep_ix: int, + n_timepoints: int, + n_monomer_points: int, + total_steps: int, + temp_path: str, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, +) -> None: + """ + Save a Simularium file for a single ReaDDy trajectory with plots and spatial + annotations. + """ + + h5_file_path = download_readdy_hdf5( + bucket, series_name, series_key, rep_ix, temp_path + ) + + assert isinstance(h5_file_path, str) + + converter = get_readdy_simularium_converter(h5_file_path, total_steps) + + if metrics: + _add_individual_plots(converter, metrics, metrics_data) + + assert isinstance(h5_file_path, str) + + # TODO: fix temporal scaling? it looks like the actual data, metrics, and + # the annotations are drawing at different time scales + + rep_id = rep_ix + 1 + pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" + time_inc = total_steps // n_timepoints + + readdy_loader = ReaddyLoader( + h5_file_path=h5_file_path, + time_inc=time_inc, + timestep=READDY_TIMESTEP, + pickle_location=bucket, + pickle_key=pickle_key, + ) + + post_processor = ReaddyPostProcessor( + readdy_loader.trajectory(), box_size=READDY_BOX_SIZE + ) + + _add_readdy_spatial_annotations(converter, post_processor, n_monomer_points) + + # Save simularium file. Turn off validate IDs for performance. + converter.save(output_path=h5_file_path, validate_ids=False) + + +def visualize_individual_readdy_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + n_replicates: int, + n_timepoints: int, + n_monomer_points: int, + total_steps: dict[str, int], + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = True, +) -> None: + """ + Visualize individual ReaDDy simulations for select conditions and + replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + n_replicates + Number of simulation replicates. + n_timepoints + Number of equally spaced timepoints to visualize. + n_monomer_points + Number of equally spaced monomer points to visualize. + total_steps + Total number of steps for each simulation key. + temp_path + Path for saving temporary h5 files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + if metrics is not None: + print(bucket, series_name, condition_keys) + all_metrics_data = get_compression_metric_data( + bucket, + series_name, + condition_keys, + list(range(1, n_replicates + 1)), + metrics, + recalculate=False, + ) + else: + metrics = [] + all_metrics_data = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for rep_ix in range(n_replicates): + rep_id = rep_ix + 1 + output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" + + # Skip if output file already exists. + if not recalculate and check_key(bucket, output_key): + print( + f"Simularium file for [ { output_key } ] already exists. Skipping." + ) + continue + + print(f"Visualizing data for [ {condition_key} ] replicate [ {rep_ix} ]") + + # Filter metrics data for specific conditon and replicate. + if condition_key: + metrics_data = all_metrics_data[ + (all_metrics_data["key"] == condition_key) + & (all_metrics_data["seed"] == rep_id) + ] + else: + metrics_data = all_metrics_data[(all_metrics_data["seed"] == rep_id)] + + visualize_individual_readdy_trajectory( + bucket, + series_name, + series_key, + rep_ix, + n_timepoints, + n_monomer_points, + total_steps[condition_key], + temp_path, + metrics, + metrics_data, + ) + + # Upload saved file to S3. + temp_key = f"{series_key}_{rep_ix}.h5.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) + + +def _find_time_units(raw_time: float, units: str = "s") -> tuple[str, float]: + """Get compact time units and a multiplier to put the times in those units.""" + time = UNIT_REGISTRY.Quantity(raw_time, units) + time_compact = time.to_compact() + return f"{time_compact.units:~}", time_compact.magnitude / raw_time + + +def _filter_time( + converter: TrajectoryConverter, n_timepoints: int +) -> TrajectoryConverter: + """Filter times using simulariumio time filter.""" + time_inc = int(converter._data.agent_data.times.shape[0] / n_timepoints) + if time_inc < 2: + return converter + converter._data = converter.filter_data([EveryNthTimestepFilter(n=time_inc)]) + return converter + + +def get_cytosim_simularium_converter( + fiber_points_data: str, + singles_data: str, + n_timepoints: int, +) -> TrajectoryConverter: + """ + Load from Cytosim outputs and generate a TrajectoryConverter to visualize an + actin trajectory in Simularium. + """ + + # TODO: fix converter not showing fiber, possible scaling issue + + singles_display_data = DisplayData( + name="linker", + radius=0.01, + display_type=DISPLAY_TYPE.SPHERE, + color="#fff", + ) + + converter = CytosimConverter( + CytosimData( + meta_data=MetaData( + box_size=BOX_SIZE, + scale_factor=CYTOSIM_SCALE_FACTOR, + ), + object_info={ + "fibers": CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=fiber_points_data, + ), + display_data={ + 1: DisplayData( + name="actin", + radius=0.02, + display_type=DISPLAY_TYPE.FIBER, + ) + }, + ), + "singles": CytosimObjectInfo( + cytosim_file=InputFileData( + file_contents=singles_data, + ), + display_data={ + 1: singles_display_data, + 2: singles_display_data, + 3: singles_display_data, + 4: singles_display_data, + }, + ), + }, + ) + ) + converter = _filter_time(converter, n_timepoints) + time_units, time_multiplier = _find_time_units(converter._data.agent_data.times[-1]) + converter._data.agent_data.times *= time_multiplier + converter._data.time_units = UnitData(time_units) + return converter + + +def visualize_individual_cytosim_trajectory( + bucket: str, + series_name: str, + series_key: str, + index: int, + n_timepoints: int, + temp_path: str, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, +) -> None: + """Save a Simularium file for a single Cytosim trajectory with plots.""" + + output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" + fiber_points_data = load_text(bucket, output_key_template % "fiber_points.txt") + singles_data = load_text(bucket, output_key_template % "singles.txt") + + converter = get_cytosim_simularium_converter( + fiber_points_data, singles_data, n_timepoints + ) + + if metrics: + _add_individual_plots(converter, metrics, metrics_data) + + # Save simularium file. Turn off validate IDs for performance. + local_file_path = f"{temp_path}/{series_key}_{index}" + converter.save(output_path=local_file_path, validate_ids=False) + + +def visualize_individual_cytosim_trajectories( + bucket: str, + series_name: str, + condition_keys: list[str], + random_seeds: list[int], + n_timepoints: int, + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = True, +) -> None: + """ + Visualize individual Cytosim simulations for select conditions and + replicates. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + condition_keys + List of condition keys. + random_seeds + Random seeds for simulations. + n_timepoints + Number of equally spaced timepoints to visualize. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + if metrics is not None: + print(bucket, series_name, condition_keys) + all_metrics_data = get_compression_metric_data( + bucket, + series_name, + condition_keys, + random_seeds, + metrics, + recalculate=False, + ) + else: + metrics = [] + all_metrics_data = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = f"{series_name}_{condition_key}" if condition_key else series_name + + for index, seed in enumerate(random_seeds): + output_key = f"{series_name}/viz/{series_key}_{seed:06d}.simularium" + + # Skip if output file already exists. + if not recalculate and check_key(bucket, output_key): + print( + f"Simularium file for [ { output_key } ] already exists. Skipping." + ) + continue + + print(f"Visualizing data for [ {condition_key} ] seed [ {seed} ]") + + # Filter metrics data for specific conditon and replicate. + if condition_key: + metrics_data = all_metrics_data[ + (all_metrics_data["key"] == condition_key) + & (all_metrics_data["seed"] == seed) + ] + else: + metrics_data = all_metrics_data[(all_metrics_data["seed"] == seed)] + + visualize_individual_cytosim_trajectory( + bucket, + series_name, + series_key, + index, + n_timepoints, + temp_path, + metrics, + metrics_data, + ) + + temp_key = f"{series_key}_{index}.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) + break + break diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index 4ede56c..1392869 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - from typing import List import numpy as np @@ -172,7 +170,9 @@ def add_sphere_agents( new_agent_data.viz_types[time_ix][start_ix:end_ix] = n_spheres * [ VIZ_TYPE.DEFAULT ] - new_agent_data.types[time_ix] += [f"{type_name} {ix}" for ix in range(n_spheres)] + new_agent_data.types[time_ix] += [ + f"{type_name} {ix}" for ix in range(n_spheres) + ] new_agent_data.positions[time_ix][start_ix:end_ix] = sphere_positions[ time_ix ][:n_spheres] @@ -182,7 +182,7 @@ def add_sphere_agents( colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] for ix in range(max_spheres): - tn = f"{type_name} {ix}" + tn = f"{type_name} {ix}" new_agent_data.display_data[tn] = DisplayData( name=tn, display_type=DISPLAY_TYPE.SPHERE, From b0ff9e6c210296ce1a3e6ac2a16fba1991d87e84 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:58:04 -0400 Subject: [PATCH 28/63] Adjust number of sampled timepoints for Cytosim to match ReaDDy --- subcell_pipeline/simulation/post_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subcell_pipeline/simulation/post_processing.py b/subcell_pipeline/simulation/post_processing.py index bb296d2..5f72de4 100644 --- a/subcell_pipeline/simulation/post_processing.py +++ b/subcell_pipeline/simulation/post_processing.py @@ -93,7 +93,7 @@ def sample_simulation_data_points( time_indices = np.rint( np.interp( - np.linspace(0, 1, n_timepoints), + np.linspace(0, 1, n_timepoints + 1), np.linspace(0, 1, n_unique_timepoints), np.arange(n_unique_timepoints), ) From fba0a71fb29916725e3888aa020e816372ba7593 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 8 Jul 2024 15:08:40 -0400 Subject: [PATCH 29/63] Refactor combined trajectory visualization --- .../simulation/post_processing.py | 2 +- .../_visualize_all_trajectories_together.py | 66 ----- .../_visualize_combined_trajectories.py | 97 +++++++ .../visualization/combined_trajectory.py | 242 ++++++++++++++++++ .../visualization/individual_trajectory.py | 104 ++++---- .../visualization/scatter_plots.py | 65 +++++ 6 files changed, 458 insertions(+), 118 deletions(-) delete mode 100644 subcell_pipeline/visualization/_visualize_all_trajectories_together.py create mode 100644 subcell_pipeline/visualization/_visualize_combined_trajectories.py create mode 100644 subcell_pipeline/visualization/combined_trajectory.py create mode 100644 subcell_pipeline/visualization/scatter_plots.py diff --git a/subcell_pipeline/simulation/post_processing.py b/subcell_pipeline/simulation/post_processing.py index 5f72de4..f7fd82d 100644 --- a/subcell_pipeline/simulation/post_processing.py +++ b/subcell_pipeline/simulation/post_processing.py @@ -103,7 +103,7 @@ def sample_simulation_data_points( for time, group in time_data.groupby("time"): sampled_points = pd.DataFrame() - sampled_points["monomer_ids"] = np.arange(n_monomer_points) + sampled_points["fiber_point"] = np.arange(n_monomer_points) sampled_points["time"] = time for column in sampled_columns: diff --git a/subcell_pipeline/visualization/_visualize_all_trajectories_together.py b/subcell_pipeline/visualization/_visualize_all_trajectories_together.py deleted file mode 100644 index 4f2a062..0000000 --- a/subcell_pipeline/visualization/_visualize_all_trajectories_together.py +++ /dev/null @@ -1,66 +0,0 @@ -# %% [markdown] -# # Process ReaDDy simulations - -# %% [markdown] -""" - -Notebook contains steps for visualizing ReaDDy and Cytosim -simulations of a single actin fiber. - -- [Visualize Combined](#visualize-combined) -""" - -# %% -if __name__ != "__main__": - raise ImportError("This module is a notebook and is not meant to be imported") - -# %% [markdown] -""" -## Visualize Combined - -Visualize all simulations with compression from ReaDDy and Cytosim together in Simularium. - -- Input: `(readdy_series_name)/data/(readdy_series_name)_(condition_key)_(index+1).csv` - and `(cytosim_series_name)/samples/(cytosim_series_name)_(condition_key)_(seed).csv` -- Output: `actin_compression_cytosim_readdy.simularium` -""" - -# %% -from subcell_pipeline.visualization.visualizer import ( - visualize_all_compressed_trajectories_together, -) -# %% -# S3 bucket for combined input and output files -subcell_bucket: str = "s3://subcell-working-bucket" - -# S3 bucket for ReaDDy input and output files -readdy_bucket: str = "s3://readdy-working-bucket" - -# Name of the ReaDDy simulation series -readdy_series_name: str = "ACTIN_COMPRESSION_VELOCITY" - -# S3 bucket for input and output files -cytosim_bucket: str = "s3://cytosim-working-bucket" - -# Name of the simulation series -cytosim_series_name: str = "COMPRESSION_VELOCITY" - -# List of condition file keys for each velocity -condition_keys: list[str] = ["0047", "0150", "0470", "1500"] - -# Number of simulation replicates -n_replicates: int = 5 - -# Number of timepoints -n_timepoints = 200 - -visualize_all_compressed_trajectories_together( - subcell_bucket, - readdy_bucket, - readdy_series_name, - cytosim_bucket, - cytosim_series_name, - condition_keys, - n_replicates, - n_timepoints, -) diff --git a/subcell_pipeline/visualization/_visualize_combined_trajectories.py b/subcell_pipeline/visualization/_visualize_combined_trajectories.py new file mode 100644 index 0000000..e2e8c22 --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_combined_trajectories.py @@ -0,0 +1,97 @@ +# %% [markdown] +# # Visualize combined ReaDDy and Cytosim simulation trajectories + +# %% [markdown] +""" +Notebook contains steps for visualizing ReaDDy and Cytosim simulations of a +single actin fiber using [Simularium](https://simularium.allencell.org/). + +- [Define visualization settings](#define-visualization-settings) +- [Visualize combined trajectories](#visualize-combined-trajectories) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.visualization.combined_trajectory import ( + visualize_combined_trajectories, +) + +# %% [markdown] +""" +## Define visualization settings +""" + +# %% +# S3 buckets for simulation and visualization input and output files +buckets: dict[str, str] = { + "combined": "s3://subcell-working-bucket", + "readdy": "s3://readdy-working-bucket", + "cytosim": "s3://cytosim-working-bucket", +} + +# Names of the simulation series for each simulator +series_names: dict[str, str] = { + "readdy": "ACTIN_COMPRESSION_VELOCITY", + "cytosim": "COMPRESSION_VELOCITY", +} + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Replicate ids for simulations +replicates: list[int] = [1, 2, 3, 4, 5] + +# Number of timepoints +n_timepoints = 201 + +# List of simulators and colors +simulator_colors = { + "cytosim": "#1cbfa4", + "readdy": "#ffae52", +} + +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CALC_BENDING_ENERGY, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + + +# %% [markdown] +""" +## Visualize combined trajectories + +Visualize all compression simulations from ReaDDy and Cytosim together in +Simularium. + +- Input: `(series_name)/samples/(series_name)_(condition_key)_(replicate).csv` +- Output: `actin_compression_cytosim_readdy.simularium` +""" + +# %% +visualize_combined_trajectories( + buckets, + series_names, + condition_keys, + replicates, + n_timepoints, + simulator_colors, + str(temp_path), + metrics=metrics, +) diff --git a/subcell_pipeline/visualization/combined_trajectory.py b/subcell_pipeline/visualization/combined_trajectory.py new file mode 100644 index 0000000..d19b60a --- /dev/null +++ b/subcell_pipeline/visualization/combined_trajectory.py @@ -0,0 +1,242 @@ +import os +from typing import Optional + +import numpy as np +import pandas as pd +from io_collection.keys.check_key import check_key +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_dataframe import load_dataframe +from io_collection.save.save_buffer import save_buffer +from simulariumio import ( + DISPLAY_TYPE, + AgentData, + CameraData, + DisplayData, + MetaData, + TrajectoryConverter, + TrajectoryData, + UnitData, +) + +from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( + get_compression_metric_data, +) +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fibers +from subcell_pipeline.visualization.scatter_plots import make_empty_scatter_plots + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) + + +def _load_fiber_points_from_dataframe( + dataframe: pd.DataFrame, n_timepoints: int +) -> np.ndarray: + """ + Load and reshape fiber points from sampled dataframe. + + Sampled dataframe is in the shape (n_timepoints x n_fiber_points, 3); method + returns the dataframe reshaped to (n_timepoints, n_fiber_points x 3). If the + sampled dataframe does not have the expected number of timepoints, method + will raise an exception. + """ + + dataframe.sort_values(by=["time", "fiber_point"]) + total_steps = dataframe.time.unique().shape[0] + + if total_steps != n_timepoints: + raise Exception( + f"Requested number of timesteps [ {n_timepoints} ] does not match " + f"number of timesteps in dataset [ {total_steps} ]." + ) + + align_fibers(dataframe) + + fiber_points = [] + for _, group in dataframe.groupby("time"): + fiber_points.append(group[["xpos", "ypos", "zpos"]].values.flatten()) + + return np.array(fiber_points) + + +def get_combined_trajectory_converter( + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: dict[str, DisplayData], +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter to visualize simulations from ReaDDy and + Cytosim together. + """ + + total_conditions = len(fiber_points) + total_steps = fiber_points[0].shape[0] + total_subpoints = fiber_points[0].shape[1] + + traj_data = TrajectoryData( + meta_data=MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([10.0, 0.0, 200.0]), + look_at_position=np.array([10.0, 0.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin compression in Cytosim and Readdy", + ), + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=total_conditions * np.ones(total_steps), + viz_types=1001 + * np.ones((total_steps, total_conditions)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(total_conditions))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, total_conditions, 3)), + radii=np.ones((total_steps, total_conditions)), + n_subpoints=total_subpoints * np.ones((total_steps, total_conditions)), + subpoints=np.moveaxis(np.array(fiber_points), [0, 1], [1, 0]), + display_data=display_data, + ), + time_units=UnitData("count"), # frames + spatial_units=UnitData("nm"), # nanometer + ) + return TrajectoryConverter(traj_data) + + +def _add_combined_plots( + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + metrics_data: pd.DataFrame, + n_timepoints: int, + plot_names: list[tuple[str, str, int]], + type_names: list[str], +) -> None: + """Add plots for combined trajectories with calculated metrics.""" + scatter_plots = make_empty_scatter_plots(metrics, total_steps=n_timepoints) + + for metric, plot in scatter_plots.items(): + for plot_name, type_name in zip(plot_names, type_names): + simulator, key, seed = plot_name + simulator_data = metrics_data[simulator] + data = simulator_data[ + (simulator_data["key"] == key) & (simulator_data["seed"] == seed) + ] + plot.ytraces[type_name] = np.array(data[metric.value]) + converter.add_plot(plot, "scatter") + + +def visualize_combined_trajectories( + buckets: dict[str, str], + series_names: dict[str, str], + condition_keys: list[str], + replicates: list[int], + n_timepoints: int, + simulator_colors: dict[str, str], + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, +) -> None: + """ + Visualize combined simulations from ReaDDy and Cytosim for select conditions + and number of replicates. + + Parameters + ---------- + buckets + Names of S3 buckets for input and output files for each simulator and + visualization. + series_names + Names of simulation series for each simulator. + condition_keys + List of condition keys. + replicates + Simulation replicates ids. + n_timepoints + Number of equally spaced timepoints to visualize. + simulator_colors + Map of simulator name to color. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + recalculate + True to recalculate visualization files, False otherwise. + """ + + fiber_points = [] + type_names = [] + plot_names = [] + display_data = {} + all_metrics_data = {} + + for simulator, color in simulator_colors.items(): + bucket = buckets[simulator] + series_name = series_names[simulator] + + # Load calculated compression metric data. + if metrics is not None: + all_metrics_data[simulator] = get_compression_metric_data( + bucket, + series_name, + condition_keys, + replicates, + metrics, + recalculate=False, + ) + else: + metrics = [] + all_metrics_data[simulator] = pd.DataFrame(columns=["key", "seed"]) + + for condition_key in condition_keys: + series_key = ( + f"{series_name}_{condition_key}" if condition_key else series_name + ) + + for replicate in replicates: + dataframe_key = ( + f"{series_name}/samples/{series_key}_{replicate:06d}.csv" + ) + + # Skip if input dataframe does not exist. + if not check_key(bucket, dataframe_key): + print( + f"Dataframe not available for {simulator} " + f"[ { dataframe_key } ]. Skipping." + ) + continue + + print( + f"Loading data for [ {simulator} ] " + f"condition [ { dataframe_key } ] " + f"replicate [ {replicate} ]" + ) + + dataframe = load_dataframe(bucket, dataframe_key) + fiber_points.append( + _load_fiber_points_from_dataframe(dataframe, n_timepoints) + ) + + condition = int(condition_key) / 10 + condition = round(condition) if condition_key[-1] == "0" else condition + + type_names.append(f"{simulator}#{condition} um/s {replicate}") + plot_names.append((simulator, condition_key, replicate)) + display_data[type_names[-1]] = DisplayData( + name=type_names[-1], + display_type=DISPLAY_TYPE.FIBER, + color=color, + ) + + converter = get_combined_trajectory_converter( + fiber_points, type_names, display_data + ) + + if metrics: + _add_combined_plots( + converter, metrics, all_metrics_data, n_timepoints, plot_names, type_names + ) + + output_key = "actin_compression_cytosim_readdy.simularium" + local_file_path = os.path.join(temp_path, output_key) + converter.save(output_path=local_file_path.replace(".simularium", "")) + output_bucket = buckets["combined"] + save_buffer(output_bucket, output_key, load_buffer(temp_path, output_key)) diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index a4d400f..b9cfccf 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -15,7 +15,6 @@ DisplayData, InputFileData, MetaData, - ScatterPlotData, TrajectoryConverter, UnitData, ) @@ -38,6 +37,7 @@ ) from subcell_pipeline.simulation.readdy.post_processor import ReaddyPostProcessor from subcell_pipeline.visualization.display_data import get_readdy_display_data +from subcell_pipeline.visualization.scatter_plots import make_empty_scatter_plots from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator READDY_SAVED_FRAMES: int = 1000 @@ -47,45 +47,6 @@ UNIT_REGISTRY = UnitRegistry() -def _empty_scatter_plots( - metrics: list[CompressionMetric], - total_steps: int = -1, - times: Optional[np.ndarray] = None, - time_units: Optional[str] = None, -) -> dict[CompressionMetric, ScatterPlotData]: - """Create empty scatter plot placeholders for list of metrics.""" - - if total_steps < 0 and times is None: - raise Exception("Either total_steps or times array is required for plots") - elif times is None: - # use normalized time - xlabel = "T (normalized)" - xtrace = (1 / float(total_steps)) * np.arange(total_steps) - else: - # use actual time - xlabel = f"T ({time_units})" - xtrace = times - total_steps = times.shape[0] - - plots = {} - - for metric in metrics: - lower_bound, upper_bound = metric.bounds() - plots[metric] = ScatterPlotData( - title=metric.label(), - xaxis_title=xlabel, - yaxis_title=metric.description(), - xtrace=xtrace, - ytraces={ - "<<<": lower_bound * np.ones(total_steps), - ">>>": upper_bound * np.ones(total_steps), - }, - render_mode="lines", - ) - - return plots - - def _add_individual_plots( converter: TrajectoryConverter, metrics: list[CompressionMetric], @@ -93,7 +54,7 @@ def _add_individual_plots( ) -> None: """Add plots to individual trajectory with calculated metrics.""" times = metrics_data["time"].values - scatter_plots = _empty_scatter_plots(metrics, times=times) + scatter_plots = make_empty_scatter_plots(metrics, times=times) for metric, plot in scatter_plots.items(): plot.ytraces["filament"] = np.array(metrics_data[metric.value]) converter.add_plot(plot, "scatter") @@ -152,7 +113,7 @@ def _add_readdy_spatial_annotations( ) -def get_readdy_simularium_converter( +def _get_readdy_simularium_converter( path_to_readdy_h5: str, total_steps: int ) -> TrajectoryConverter: """ @@ -195,6 +156,29 @@ def visualize_individual_readdy_trajectory( """ Save a Simularium file for a single ReaDDy trajectory with plots and spatial annotations. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + rep_ix + Replicate index. + n_timepoints + Number of equally spaced timepoints to visualize. + n_monomer_points + Number of equally spaced monomer points to visualize. + total_steps + Total number of steps for each simulation key. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + metrics_data + Calculated compression metrics data. """ h5_file_path = download_readdy_hdf5( @@ -203,7 +187,7 @@ def visualize_individual_readdy_trajectory( assert isinstance(h5_file_path, str) - converter = get_readdy_simularium_converter(h5_file_path, total_steps) + converter = _get_readdy_simularium_converter(h5_file_path, total_steps) if metrics: _add_individual_plots(converter, metrics, metrics_data) @@ -268,7 +252,7 @@ def visualize_individual_readdy_trajectories( total_steps Total number of steps for each simulation key. temp_path - Path for saving temporary h5 files. + Local path for saving visualization output files. metrics List of metrics to include in visualization plots. recalculate @@ -276,7 +260,6 @@ def visualize_individual_readdy_trajectories( """ if metrics is not None: - print(bucket, series_name, condition_keys) all_metrics_data = get_compression_metric_data( bucket, series_name, @@ -350,7 +333,7 @@ def _filter_time( return converter -def get_cytosim_simularium_converter( +def _get_cytosim_simularium_converter( fiber_points_data: str, singles_data: str, n_timepoints: int, @@ -419,13 +402,35 @@ def visualize_individual_cytosim_trajectory( metrics: list[CompressionMetric], metrics_data: pd.DataFrame, ) -> None: - """Save a Simularium file for a single Cytosim trajectory with plots.""" + """ + Save a Simularium file for a single Cytosim trajectory with plots and + spatial annotations. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + series_name + Name of simulation series. + series_key + Combination of series and condition names. + index + Simulation replicate index. + n_timepoints + Number of equally spaced timepoints to visualize. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. + metrics_data + Calculated compression metrics data. + """ output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" fiber_points_data = load_text(bucket, output_key_template % "fiber_points.txt") singles_data = load_text(bucket, output_key_template % "singles.txt") - converter = get_cytosim_simularium_converter( + converter = _get_cytosim_simularium_converter( fiber_points_data, singles_data, n_timepoints ) @@ -472,7 +477,6 @@ def visualize_individual_cytosim_trajectories( """ if metrics is not None: - print(bucket, series_name, condition_keys) all_metrics_data = get_compression_metric_data( bucket, series_name, @@ -522,5 +526,3 @@ def visualize_individual_cytosim_trajectories( temp_key = f"{series_key}_{index}.simularium" save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) - break - break diff --git a/subcell_pipeline/visualization/scatter_plots.py b/subcell_pipeline/visualization/scatter_plots.py new file mode 100644 index 0000000..b401877 --- /dev/null +++ b/subcell_pipeline/visualization/scatter_plots.py @@ -0,0 +1,65 @@ +from typing import Optional + +import numpy as np +from simulariumio import ScatterPlotData + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) + + +def make_empty_scatter_plots( + metrics: list[CompressionMetric], + total_steps: int = -1, + times: Optional[np.ndarray] = None, + time_units: Optional[str] = None, +) -> dict[CompressionMetric, ScatterPlotData]: + """ + Create empty scatter plot placeholders for list of metrics. + + Parameters + ---------- + metrics + List of metrics. + total_steps + Total number of timesteps. Required if times is not given. + times + List of timepoints. Required if total_steps is not given. + time_units + Time units. Used only with times. + + Returns + ------- + : + Map of metric to mpty scatter plot placeholder. + """ + + if total_steps < 0 and times is None: + raise Exception("Either total_steps or times array is required for plots") + elif times is None: + # use normalized time + xlabel = "T (normalized)" + xtrace = (1 / float(total_steps)) * np.arange(total_steps) + else: + # use actual time + xlabel = f"T ({time_units})" + xtrace = times + total_steps = times.shape[0] + + plots = {} + + for metric in metrics: + lower_bound, upper_bound = metric.bounds() + plots[metric] = ScatterPlotData( + title=metric.label(), + xaxis_title=xlabel, + yaxis_title=metric.description(), + xtrace=xtrace, + ytraces={ + "<<<": lower_bound * np.ones(total_steps), + ">>>": upper_bound * np.ones(total_steps), + }, + render_mode="lines", + ) + + return plots From 11986b67036d1aa25a9093162e6a469eab997e27 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Mon, 8 Jul 2024 15:23:51 -0400 Subject: [PATCH 30/63] Add visualization notebooks to docs --- docs/index.rst | 2 ++ docs/visualization.rst | 5 +++++ subcell_pipeline/visualization/README.md | 12 ++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 docs/visualization.rst create mode 100644 subcell_pipeline/visualization/README.md diff --git a/docs/index.rst b/docs/index.rst index 4e90c35..d1fbdd2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,6 +16,7 @@ Simulations Analysis + Visualization .. autosummary:: :toctree: _summary @@ -25,6 +26,7 @@ subcell_pipeline.simulation subcell_pipeline.analysis + subcell_pipeline.visualization .. toctree:: :hidden: diff --git a/docs/visualization.rst b/docs/visualization.rst new file mode 100644 index 0000000..acc9f03 --- /dev/null +++ b/docs/visualization.rst @@ -0,0 +1,5 @@ +Visualization workflow notebooks +================================ + +.. include:: ../subcell_pipeline/visualization/README.md + :parser: myst_parser.sphinx_ diff --git a/subcell_pipeline/visualization/README.md b/subcell_pipeline/visualization/README.md new file mode 100644 index 0000000..fc3fc8f --- /dev/null +++ b/subcell_pipeline/visualization/README.md @@ -0,0 +1,12 @@ +# Visualization + +Visualization of simulation trajectories using [Simularium](https://simularium.allencell.org/). + +## Individual simulations + +- **Visualize ReaDDy simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_readdy_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_readdy_trajectories.html)) +- **Visualize Cytosim simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_cytosim_trajectories.html)) + +## Combined simulations + +- **Visualize combined ReaDDy and Cytosim simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_combined_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_combined_trajectories.html)) From 07ca4ebe9517f64d4e1e0d0280365ad39f92fa6d Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:52:22 -0400 Subject: [PATCH 31/63] Add normalized time to compression metrics workflow --- .../analysis/compression_metrics/compression_analysis.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py index 0527e17..11fad98 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py @@ -131,7 +131,10 @@ def calculate_compression_metrics( polymer_trace=polymer_trace, **options ) - return df_metrics.reset_index().rename(columns={"index": "time"}) + metrics = df_metrics.reset_index().rename(columns={"index": "time"}) + metrics["normalized_time"] = metrics["time"] / metrics["time"].max() + + return metrics def save_compression_metrics( From 1d378e25f80dc4e0b640e6bf7fb1115c36260ccc Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:54:05 -0400 Subject: [PATCH 32/63] Update PCA transform points --- .../_run_pca_on_compression_simulations.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py index e7e5bd3..630ccc8 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py @@ -108,10 +108,10 @@ ("cytosim", "0150"): 0.01, ("cytosim", "0470"): 0.00316, ("cytosim", "1500"): 0.001, - ("readdy", "0047"): 1000, - ("readdy", "0150"): 1000, - ("readdy", "0470"): 1000, - ("readdy", "1500"): 1000, + ("readdy", "0047"): 100, + ("readdy", "0150"): 100, + ("readdy", "0470"): 100, + ("readdy", "1500"): 100, } save_aligned_fibers( @@ -162,8 +162,8 @@ """ # %% points: list[list[float]] = [ - [-600, -300, 0, 300, 600, 900], - [-200, 0, 200, 400], + [-900, -600, -300, 0, 300, 600], + [-600, -400, -200, 0, 200], ] save_pca_transforms(pca, points, save_location, "actin_compression_pca_transforms.json") From a84eb7c7498bd9fcb133811a1241c7e60894d373 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Tue, 9 Jul 2024 14:07:44 -0700 Subject: [PATCH 33/63] * refactor tomography visualization functions * add recalculate parameter for sampling * fix scale factor for tomography data * move constants to where used Co-authored-by: Jessica S. Yu --- .../_analyze_actin_cme_tomography_data.py | 41 +++- .../tomography_data/tomography_data.py | 17 +- subcell_pipeline/constants.py | 19 +- .../_visualize_tomography_data.py | 12 +- subcell_pipeline/visualization/tomography.py | 220 ++++++++++++++++++ 5 files changed, 274 insertions(+), 35 deletions(-) create mode 100644 subcell_pipeline/visualization/tomography.py diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index 8732556..92d867f 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -30,7 +30,9 @@ plot_tomography_data_by_dataset, sample_tomography_data, ) -from subcell_pipeline.constants import TOMOGRAPHY_SCALE_FACTOR + +# pixels to um +TOMOGRAPHY_SCALE_FACTOR: float = 0.0006 # %% [markdown] """ @@ -70,12 +72,19 @@ ] # %% -# TODO These datasets have different scales (see plots), which is correct? branched_df = get_branched_tomography_data( - bucket, name, repository, branched_datasets, TOMOGRAPHY_SCALE_FACTOR + bucket=bucket, + name=name, + repository=repository, + datasets=branched_datasets, + scale_factor=TOMOGRAPHY_SCALE_FACTOR, ) unbranched_df = get_unbranched_tomography_data( - bucket, name, repository, unbranched_datasets, TOMOGRAPHY_SCALE_FACTOR + bucket=bucket, + name=name, + repository=repository, + datasets=unbranched_datasets, + scale_factor=TOMOGRAPHY_SCALE_FACTOR, ) # %% [markdown] @@ -84,7 +93,9 @@ """ # %% -plot_tomography_data_by_dataset(branched_df, bucket, f"{name}/{name}_plots_branched.png") +plot_tomography_data_by_dataset( + branched_df, bucket, f"{name}/{name}_plots_branched.png" +) # %% [markdown] """ @@ -92,7 +103,9 @@ """ # %% -plot_tomography_data_by_dataset(unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png") +plot_tomography_data_by_dataset( + unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png" +) # %% [markdown] """ @@ -108,6 +121,8 @@ # Minimum number of points for valid fiber minimum_points = 3 +# True to recalculate the sampled tomography data, False otherwise. +recalculate = True # %% [markdown] """ @@ -120,10 +135,14 @@ # %% sampled_key = f"{name}/{name}_coordinates_sampled.csv" -# TODO scale properly before concat (or do we not want to analyze both datasets?) all_tomogram_df = pd.concat([branched_df, unbranched_df]) sampled_data = sample_tomography_data( - all_tomogram_df, bucket, sampled_key, n_monomer_points, minimum_points + all_tomogram_df, + bucket, + sampled_key, + n_monomer_points, + minimum_points, + recalculate=recalculate, ) # %% [markdown] @@ -132,4 +151,8 @@ """ # %% -plot_tomography_data_by_dataset(sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png") +plot_tomography_data_by_dataset( + sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png" +) + +# %% diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index b57d5a5..7f38918 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -191,6 +191,7 @@ def sample_tomography_data( n_monomer_points: int, minimum_points: int, sampled_columns: list[str] = TOMOGRAPHY_SAMPLE_COLUMNS, + recalculate: bool = False, ) -> pd.DataFrame: """ Sample selected columns from tomography data at given resolution. @@ -209,6 +210,8 @@ def sample_tomography_data( Minimum number of points for valid fiber. sampled_columns List of column names to sample. + recalculate + True to recalculate the sampled tomography data, False otherwise. Returns ------- @@ -216,21 +219,17 @@ def sample_tomography_data( Sampled tomography data. """ - if check_key(save_location, save_key): + if check_key(save_location, save_key) and not recalculate: print(f"Loading existing sampled tomogram data from [ { save_key } ]") return load_dataframe(save_location, save_key) else: all_sampled_points = [] - # TODO sort experimental samples in order along the fiber before resampling + # TODO sort experimental samples in order along the fiber before resampling # (see simularium visualization) - for fiber_id, group in data.groupby("id"): if len(group) < minimum_points: continue - - # TODO resample uniformly along the fiber length rather than - # uniformly between experimental samples sampled_points = pd.DataFrame() sampled_points["monomer_ids"] = np.arange(n_monomer_points) @@ -254,7 +253,9 @@ def sample_tomography_data( return all_sampled_df -def plot_tomography_data_by_dataset(data: pd.DataFrame, bucket: str, output_key: str) -> None: +def plot_tomography_data_by_dataset( + data: pd.DataFrame, bucket: str, output_key: str +) -> None: """ Plot tomography data for each dataset. @@ -269,7 +270,7 @@ def plot_tomography_data_by_dataset(data: pd.DataFrame, bucket: str, output_key: """ make_working_directory() local_save_path = os.path.join(WORKING_DIR_PATH, os.path.basename(output_key)) - + for dataset, group in data.groupby("dataset"): _, ax = plt.subplots(1, 3, figsize=(6, 2)) diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py index 75c0856..c679cad 100644 --- a/subcell_pipeline/constants.py +++ b/subcell_pipeline/constants.py @@ -3,9 +3,7 @@ from typing import Dict, List, Union import numpy as np - -from simulariumio import DisplayData, DISPLAY_TYPE - +from simulariumio import DISPLAY_TYPE, DisplayData WORKING_DIR_PATH: str = "data/" @@ -42,11 +40,11 @@ READDY_SAVED_FRAMES: int = 1000 READDY_TOTAL_STEPS: Dict[str, int] = { - "ACTIN_NO_COMPRESSION" : 1e7, - "ACTIN_COMPRESSION_VELOCITY_0047" : 3.2e8, - "ACTIN_COMPRESSION_VELOCITY_0150" : 1e8, - "ACTIN_COMPRESSION_VELOCITY_0470" : 3.2e7, - "ACTIN_COMPRESSION_VELOCITY_1500" : 1e7, + "ACTIN_NO_COMPRESSION": 1e7, + "ACTIN_COMPRESSION_VELOCITY_0047": 3.2e8, + "ACTIN_COMPRESSION_VELOCITY_0150": 1e8, + "ACTIN_COMPRESSION_VELOCITY_0470": 3.2e7, + "ACTIN_COMPRESSION_VELOCITY_1500": 1e7, } # particle types correspond to types from simularium/readdy-models @@ -199,10 +197,7 @@ def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] -TOMOGRAPHY_VIZ_SCALE: float = 1000. +TOMOGRAPHY_VIZ_SCALE: float = 1000.0 TOMOGRAPHY_MIN_COMPRESSION: int = 2 - -# pixels to um -TOMOGRAPHY_SCALE_FACTOR: float = 0.00006 diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py index e432405..cfce700 100644 --- a/subcell_pipeline/visualization/_visualize_tomography_data.py +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -14,6 +14,9 @@ if __name__ != "__main__": raise ImportError("This module is a notebook and is not meant to be imported") +# %% +from subcell_pipeline.visualization.tomography import visualize_tomography + # %% [markdown] """ ## Visualize Tomography @@ -23,12 +26,6 @@ - Input: `(name)/(name)_coordinates_sampled.csv` - Output: `(name)/(name).simularium` """ - -# %% -from subcell_pipeline.visualization.visualizer import ( - visualize_tomography, -) - # %% # Dataset name name = "actin_cme_tomography" @@ -36,4 +33,7 @@ # S3 bucket for input and output files bucket = "s3://subcell-working-bucket" +# %% visualize_tomography(bucket, name) + +# %% diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py new file mode 100644 index 0000000..db88f62 --- /dev/null +++ b/subcell_pipeline/visualization/tomography.py @@ -0,0 +1,220 @@ +import os +from typing import Dict, Tuple + +import numpy as np +import pandas as pd +from io_collection.load.load_dataframe import load_dataframe +from simulariumio import ( + DISPLAY_TYPE, + AgentData, + CameraData, + DisplayData, + HistogramPlotData, + MetaData, + TrajectoryConverter, + TrajectoryData, + UnitData, +) + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) + +from ..constants import ( + TOMOGRAPHY_SAMPLE_COLUMNS, + TOMOGRAPHY_VIZ_SCALE, + WORKING_DIR_PATH, +) +from ..temporary_file_io import make_working_directory +from .spatial_annotator import SpatialAnnotator + + +def _save_and_upload_simularium_file( + converter: TrajectoryConverter, bucket: str, output_key: str +) -> None: + """ + Save a local simularium file and upload it to s3. + """ + local_key = os.path.splitext(os.path.basename(output_key))[0] + local_output_path = os.path.join(WORKING_DIR_PATH, local_key) + make_working_directory() + + converter.save(local_output_path) + + # upload_file_to_s3(bucket, f"{local_output_path}.simularium", output_key) TODO + + +def _generate_simularium_for_fiber_points( + fiber_points: list[np.ndarray], + type_names: list[str], + meta_data: MetaData, + display_data: Dict[str, DisplayData], + time_units: UnitData, + spatial_units: UnitData, +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter for the fiber_points + (list of fibers, each = timesteps X points X 3) + """ + # build subpoints array with correct dimensions + n_fibers = len(fiber_points) + total_steps = fiber_points[0].shape[0] + n_points = fiber_points[0].shape[1] + subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) + for time_ix in range(total_steps): + for fiber_ix in range(n_fibers): + subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] + subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) + # convert to simularium + traj_data = TrajectoryData( + meta_data=meta_data, + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=n_fibers * np.ones((total_steps)), + viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(n_fibers))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, n_fibers, 3)), + radii=0.5 * np.ones((total_steps, n_fibers)), + n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), + subpoints=subpoints, + display_data=display_data, + ), + time_units=time_units, + spatial_units=spatial_units, + ) + return TrajectoryConverter(traj_data) + + +def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: + return { + CompressionMetric.CONTOUR_LENGTH: HistogramPlotData( + title="Contour Length", + xaxis_title="filament contour length (nm)", + traces={}, + ), + CompressionMetric.COMPRESSION_RATIO: HistogramPlotData( + title="Compression Percentage", + xaxis_title="percent (%)", + traces={}, + ), + CompressionMetric.AVERAGE_PERP_DISTANCE: HistogramPlotData( + title="Average Perpendicular Distance", + xaxis_title="distance (nm)", + traces={}, + ), + CompressionMetric.CALC_BENDING_ENERGY: HistogramPlotData( + title="Bending Energy", + xaxis_title="energy", + traces={}, + ), + CompressionMetric.NON_COPLANARITY: HistogramPlotData( + title="Non-coplanarity", + xaxis_title="3rd component variance from PCA", + traces={}, + ), + CompressionMetric.PEAK_ASYMMETRY: HistogramPlotData( + title="Peak Asymmetry", + xaxis_title="normalized peak distance", + traces={}, + ), + } + + +def _add_tomography_plots( + fiber_points: list[np.ndarray], converter: TrajectoryConverter +) -> None: + """ + Add plots to tomography data using pre-calculated metrics. + """ + plots = _empty_tomography_plots() + for metric in plots: + values = [] + for fiber in fiber_points: + values.append(metric.calculate_metric(polymer_trace=fiber)) + plots[metric].traces["actin"] = np.array(values) + if metric == CompressionMetric.COMPRESSION_RATIO: + plots[metric].traces["actin"] *= 100.0 + converter.add_plot(plots[metric], "histogram") + + +def _get_tomography_spatial_center_and_size( + tomo_df: pd.DataFrame, +) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the center and size of the tomography dataset in 3D space. + """ + ixs = [ + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[0]), + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[1]), + list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[2]), + ] + unique_values = list(map(set, tomo_df.values.T)) + mins = [] + maxs = [] + for dim_ix in range(3): + d_values = np.array(list(unique_values[ixs[dim_ix]])) + mins.append(np.amin(d_values)) + maxs.append(np.amax(d_values)) + mins = np.array(mins) + maxs = np.array(maxs) + return mins + 0.5 * (maxs - mins), maxs - mins + + +def visualize_tomography(bucket: str, name: str) -> None: + """ + Visualize segmented tomography data for actin fibers. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + name + Name of tomography dataset. + """ + tomo_key = f"{name}/{name}_coordinates_sampled.csv" + tomo_df = load_dataframe(bucket, tomo_key) + tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) + tomo_df = tomo_df.reset_index(drop=True) + time_units = UnitData("count") + spatial_units = UnitData("um", 0.003) + names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) + traj_ids = names[np.argsort(ids)] + for traj_id in traj_ids: + fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] + center, box_size = _get_tomography_spatial_center_and_size(fiber_df) + fiber_points = TOMOGRAPHY_VIZ_SCALE * ( + np.array([fiber_df[["xpos", "ypos", "zpos"]]]) - center + ) + type_names = ["Raw data"] + display_data = { + "Raw data": DisplayData( + name="Raw data", + display_type=DISPLAY_TYPE.FIBER, + color="#888888", + ) + } + converter = _generate_simularium_for_fiber_points( + [fiber_points], + type_names, + MetaData( + box_size=TOMOGRAPHY_VIZ_SCALE * box_size, + camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])), + ), + display_data, + time_units, + spatial_units, + ) + + # TODO remove after debugging fiber point order + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + [fiber_points[0]], + type_name="point", + radius=0.8, + ) + + _add_tomography_plots([fiber_points[0]], converter) + _save_and_upload_simularium_file( + converter, bucket, f"{name}/{name}_{traj_id}.simularium" + ) From cf1ca60b3370e266a2d39ce21d9e19f8ed49d5e1 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Tue, 9 Jul 2024 18:14:04 -0400 Subject: [PATCH 34/63] Update tomography visualization --- .../_visualize_tomography_data.py | 48 +++- .../visualization/histogram_plots.py | 34 +++ .../visualization/scatter_plots.py | 2 +- subcell_pipeline/visualization/tomography.py | 226 ++++++++---------- 4 files changed, 167 insertions(+), 143 deletions(-) create mode 100644 subcell_pipeline/visualization/histogram_plots.py diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py index cfce700..4208991 100644 --- a/subcell_pipeline/visualization/_visualize_tomography_data.py +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -3,11 +3,11 @@ # %% [markdown] """ +Notebook contains steps for visualizing segmented tomography data for actin +fibers using [Simularium](https://simularium.allencell.org/). -Notebook contains steps for visualizing segmented tomography data -for actin fibers. - -- [Visualize Tomography](#visualize-tomography) +- [Define visualization settings](#define-visualization-settings) +- [Visualize tomography data](#visualize-tomography) """ # %% @@ -15,25 +15,49 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% +from pathlib import Path + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) from subcell_pipeline.visualization.tomography import visualize_tomography # %% [markdown] """ -## Visualize Tomography +## Define visualization settings -Visualize segmented tomography data for actin fibers. - -- Input: `(name)/(name)_coordinates_sampled.csv` -- Output: `(name)/(name).simularium` +Define simulation and visualization settings that are shared between different +simulation series. """ + # %% -# Dataset name +# Tomography dataset name name = "actin_cme_tomography" # S3 bucket for input and output files bucket = "s3://subcell-working-bucket" -# %% -visualize_tomography(bucket, name) +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + +# List of compression metrics to include +metrics = [ + CompressionMetric.NON_COPLANARITY, + CompressionMetric.PEAK_ASYMMETRY, + CompressionMetric.AVERAGE_PERP_DISTANCE, + CompressionMetric.CALC_BENDING_ENERGY, + CompressionMetric.CONTOUR_LENGTH, + CompressionMetric.COMPRESSION_RATIO, +] + +# %% [markdown] +""" +## Visualize tomography data + +- Input: `(name)/(name)_coordinates_sampled.csv` +- Output: `(name)/(name).simularium` +""" # %% +visualize_tomography(bucket, name, str(temp_path), metrics) diff --git a/subcell_pipeline/visualization/histogram_plots.py b/subcell_pipeline/visualization/histogram_plots.py new file mode 100644 index 0000000..b7ed292 --- /dev/null +++ b/subcell_pipeline/visualization/histogram_plots.py @@ -0,0 +1,34 @@ +from simulariumio import HistogramPlotData, ScatterPlotData + +from subcell_pipeline.analysis.compression_metrics.compression_metric import ( + CompressionMetric, +) + + +def make_empty_histogram_plots( + metrics: list[CompressionMetric], +) -> dict[CompressionMetric, ScatterPlotData]: + """ + Create empty histogram plot placeholders for list of metrics. + + Parameters + ---------- + metrics + List of metrics. + + Returns + ------- + : + Map of metric to empty histogram plot placeholder. + """ + + plots = {} + + for metric in metrics: + plots[metric] = HistogramPlotData( + title=metric.label(), + xaxis_title=metric.description(), + traces={}, + ) + + return plots diff --git a/subcell_pipeline/visualization/scatter_plots.py b/subcell_pipeline/visualization/scatter_plots.py index b401877..d55de5f 100644 --- a/subcell_pipeline/visualization/scatter_plots.py +++ b/subcell_pipeline/visualization/scatter_plots.py @@ -31,7 +31,7 @@ def make_empty_scatter_plots( Returns ------- : - Map of metric to mpty scatter plot placeholder. + Map of metric to empty scatter plot placeholder. """ if total_steps < 0 and times is None: diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py index db88f62..0d2269b 100644 --- a/subcell_pipeline/visualization/tomography.py +++ b/subcell_pipeline/visualization/tomography.py @@ -1,15 +1,15 @@ import os -from typing import Dict, Tuple +from typing import Optional import numpy as np import pandas as pd +from io_collection.load.load_buffer import load_buffer from io_collection.load.load_dataframe import load_dataframe +from io_collection.save.save_buffer import save_buffer from simulariumio import ( - DISPLAY_TYPE, AgentData, CameraData, DisplayData, - HistogramPlotData, MetaData, TrajectoryConverter, TrajectoryData, @@ -19,43 +19,29 @@ from subcell_pipeline.analysis.compression_metrics.compression_metric import ( CompressionMetric, ) +from subcell_pipeline.visualization.histogram_plots import make_empty_histogram_plots +from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator -from ..constants import ( - TOMOGRAPHY_SAMPLE_COLUMNS, - TOMOGRAPHY_VIZ_SCALE, - WORKING_DIR_PATH, -) -from ..temporary_file_io import make_working_directory -from .spatial_annotator import SpatialAnnotator - - -def _save_and_upload_simularium_file( - converter: TrajectoryConverter, bucket: str, output_key: str -) -> None: - """ - Save a local simularium file and upload it to s3. - """ - local_key = os.path.splitext(os.path.basename(output_key))[0] - local_output_path = os.path.join(WORKING_DIR_PATH, local_key) - make_working_directory() +TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] - converter.save(local_output_path) - - # upload_file_to_s3(bucket, f"{local_output_path}.simularium", output_key) TODO +TOMOGRAPHY_VIZ_SCALE: float = 100.0 def _generate_simularium_for_fiber_points( fiber_points: list[np.ndarray], type_names: list[str], meta_data: MetaData, - display_data: Dict[str, DisplayData], + display_data: dict[str, DisplayData], time_units: UnitData, spatial_units: UnitData, ) -> TrajectoryConverter: """ - Generate a TrajectoryConverter for the fiber_points - (list of fibers, each = timesteps X points X 3) + Generate a TrajectoryConverter for the given fiber points. + + Fiber points is a list of fibers, where each fiber has the shape (timesteps + x points x 3). """ + # build subpoints array with correct dimensions n_fibers = len(fiber_points) total_steps = fiber_points[0].shape[0] @@ -65,12 +51,13 @@ def _generate_simularium_for_fiber_points( for fiber_ix in range(n_fibers): subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) + # convert to simularium traj_data = TrajectoryData( meta_data=meta_data, agent_data=AgentData( times=np.arange(total_steps), - n_agents=n_fibers * np.ones((total_steps)), + n_agents=n_fibers * np.ones(total_steps), viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 unique_ids=np.array(total_steps * [list(range(n_fibers))]), types=total_steps * [type_names], @@ -86,82 +73,53 @@ def _generate_simularium_for_fiber_points( return TrajectoryConverter(traj_data) -def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: - return { - CompressionMetric.CONTOUR_LENGTH: HistogramPlotData( - title="Contour Length", - xaxis_title="filament contour length (nm)", - traces={}, - ), - CompressionMetric.COMPRESSION_RATIO: HistogramPlotData( - title="Compression Percentage", - xaxis_title="percent (%)", - traces={}, - ), - CompressionMetric.AVERAGE_PERP_DISTANCE: HistogramPlotData( - title="Average Perpendicular Distance", - xaxis_title="distance (nm)", - traces={}, - ), - CompressionMetric.CALC_BENDING_ENERGY: HistogramPlotData( - title="Bending Energy", - xaxis_title="energy", - traces={}, - ), - CompressionMetric.NON_COPLANARITY: HistogramPlotData( - title="Non-coplanarity", - xaxis_title="3rd component variance from PCA", - traces={}, - ), - CompressionMetric.PEAK_ASYMMETRY: HistogramPlotData( - title="Peak Asymmetry", - xaxis_title="normalized peak distance", - traces={}, - ), - } - - def _add_tomography_plots( - fiber_points: list[np.ndarray], converter: TrajectoryConverter + converter: TrajectoryConverter, + metrics: list[CompressionMetric], + fiber_points: list[np.ndarray], ) -> None: - """ - Add plots to tomography data using pre-calculated metrics. - """ - plots = _empty_tomography_plots() - for metric in plots: - values = [] - for fiber in fiber_points: - values.append(metric.calculate_metric(polymer_trace=fiber)) - plots[metric].traces["actin"] = np.array(values) + """Add plots to tomography data with calculated metrics.""" + + histogram_plots = make_empty_histogram_plots(metrics) + + for metric, plot in histogram_plots.items(): + values = [ + metric.calculate_metric(polymer_trace=fiber[0, :, :]) + for fiber in fiber_points + ] + if metric == CompressionMetric.COMPRESSION_RATIO: - plots[metric].traces["actin"] *= 100.0 - converter.add_plot(plots[metric], "histogram") + plot.traces["actin"] = np.array(values) * 100 + else: + plot.traces["actin"] = np.array(values) + + converter.add_plot(plot, "histogram") def _get_tomography_spatial_center_and_size( tomo_df: pd.DataFrame, -) -> Tuple[np.ndarray, np.ndarray]: - """ - Get the center and size of the tomography dataset in 3D space. - """ - ixs = [ - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[0]), - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[1]), - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[2]), - ] - unique_values = list(map(set, tomo_df.values.T)) - mins = [] - maxs = [] - for dim_ix in range(3): - d_values = np.array(list(unique_values[ixs[dim_ix]])) - mins.append(np.amin(d_values)) - maxs.append(np.amax(d_values)) - mins = np.array(mins) - maxs = np.array(maxs) +) -> tuple[np.ndarray, np.ndarray]: + """Get the center and size of the tomography dataset in 3D space.""" + + all_mins = [] + all_maxs = [] + + for column in TOMOGRAPHY_SAMPLE_COLUMNS: + all_mins.append(tomo_df[column].min()) + all_maxs.append(tomo_df[column].max()) + + mins = np.array(all_mins) + maxs = np.array(all_maxs) + return mins + 0.5 * (maxs - mins), maxs - mins -def visualize_tomography(bucket: str, name: str) -> None: +def visualize_tomography( + bucket: str, + name: str, + temp_path: str, + metrics: Optional[list[CompressionMetric]] = None, +) -> None: """ Visualize segmented tomography data for actin fibers. @@ -171,50 +129,58 @@ def visualize_tomography(bucket: str, name: str) -> None: Name of S3 bucket for input and output files. name Name of tomography dataset. + temp_path + Local path for saving visualization output files. + metrics + List of metrics to include in visualization plots. """ + tomo_key = f"{name}/{name}_coordinates_sampled.csv" tomo_df = load_dataframe(bucket, tomo_key) tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) tomo_df = tomo_df.reset_index(drop=True) + time_units = UnitData("count") spatial_units = UnitData("um", 0.003) - names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) - traj_ids = names[np.argsort(ids)] - for traj_id in traj_ids: - fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] - center, box_size = _get_tomography_spatial_center_and_size(fiber_df) + + center, box_size = _get_tomography_spatial_center_and_size(tomo_df) + + all_fiber_points = [] + all_type_names = [] + + for fiber_id, fiber_df in tomo_df.groupby("id"): + fiber_index, dataset = fiber_id.split("_", 1) fiber_points = TOMOGRAPHY_VIZ_SCALE * ( - np.array([fiber_df[["xpos", "ypos", "zpos"]]]) - center - ) - type_names = ["Raw data"] - display_data = { - "Raw data": DisplayData( - name="Raw data", - display_type=DISPLAY_TYPE.FIBER, - color="#888888", - ) - } - converter = _generate_simularium_for_fiber_points( - [fiber_points], - type_names, - MetaData( - box_size=TOMOGRAPHY_VIZ_SCALE * box_size, - camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])), - ), - display_data, - time_units, - spatial_units, + np.array([fiber_df[TOMOGRAPHY_SAMPLE_COLUMNS]]) - center ) + all_fiber_points.append(fiber_points) + all_type_names.append(f"{dataset}#{fiber_index}") + + converter = _generate_simularium_for_fiber_points( + all_fiber_points, + all_type_names, + MetaData( + box_size=TOMOGRAPHY_VIZ_SCALE * box_size, + camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])), + ), + {}, + time_units, + spatial_units, + ) - # TODO remove after debugging fiber point order - converter._data = SpatialAnnotator.add_sphere_agents( - converter._data, - [fiber_points[0]], - type_name="point", - radius=0.8, - ) + # TODO remove after debugging fiber point order + converter._data = SpatialAnnotator.add_sphere_agents( + converter._data, + fiber_points, + type_name="point", + radius=0.8, + ) - _add_tomography_plots([fiber_points[0]], converter) - _save_and_upload_simularium_file( - converter, bucket, f"{name}/{name}_{traj_id}.simularium" - ) + if metrics: + _add_tomography_plots(converter, metrics, all_fiber_points) + + # Save locally and copy to bucket. + local_file_path = os.path.join(temp_path, name) + converter.save(output_path=local_file_path) + output_key = f"{name}/{name}.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, f"{name}.simularium")) From e4a759714cf10f1d5af009801f1a3d6463e6ccc2 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 10 Jul 2024 10:27:27 -0700 Subject: [PATCH 35/63] add column to check if files are ordered --- .../tomography_data/tomography_data.py | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index 7f38918..53c0cdd 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -11,6 +11,30 @@ from ...temporary_file_io import make_working_directory, upload_file_to_s3 +def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_: + """ + Test whether the angles between consecutive segments of a polymer + trace are less than 90 degrees. + + Parameters + ---------- + polymer_trace + A 2D array where each row is a point in 3D space. + + Returns + ------- + bool + True if all consecutive angles are less than 180 degrees. + """ + vectors = polymer_trace[1:] - polymer_trace[:-1] + + vectors /= np.linalg.norm(vectors, axis=1)[:, np.newaxis] + dot_products = np.dot(vectors[1:], vectors[:-1].T) + + # Check if any angle is greater than 90 degrees + return np.all(dot_products > 0) + + def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: """ Read tomography data from file as dataframe. @@ -240,9 +264,13 @@ def sample_tomography_data( sampled_points[column] = np.interp( np.linspace(0, 1, n_monomer_points), np.linspace(0, 1, group.shape[0]), - group[column].values, + group[column].to_numpy(), ) + sampled_points["ordered"] = test_consecutive_segment_angles( + sampled_points[sampled_columns].to_numpy() + ) + all_sampled_points.append(sampled_points) all_sampled_df = pd.concat(all_sampled_points) From 120ad45cdf082a8a5f7d8f7ccc9f13b460566257 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:48:17 -0400 Subject: [PATCH 36/63] Formatting for compression metrics analysis --- .../analysis/compression_metrics/compression_metric.py | 8 ++++---- .../analysis/compression_metrics/polymer_trace.py | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/subcell_pipeline/analysis/compression_metrics/compression_metric.py b/subcell_pipeline/analysis/compression_metrics/compression_metric.py index 260b746..f2cf0f2 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_metric.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_metric.py @@ -87,7 +87,7 @@ def description(self: Enum) -> str: } return units.get(self.value, "") - def bounds(self: Enum) -> tuple[float,float]: + def bounds(self: Enum) -> tuple[float, float]: """ Return the default bounds for the compression metric. @@ -104,12 +104,12 @@ def bounds(self: Enum) -> tuple[float,float]: bounds = { CompressionMetric.NON_COPLANARITY.value: (0, 0.03), CompressionMetric.PEAK_ASYMMETRY.value: (0, 0.5), - CompressionMetric.SUM_BENDING_ENERGY.value: (0, 0), # TODO + CompressionMetric.SUM_BENDING_ENERGY.value: (0, 0), # TODO CompressionMetric.AVERAGE_PERP_DISTANCE.value: (0, 85.0), - CompressionMetric.TOTAL_FIBER_TWIST.value: (0, 0), # TODO + CompressionMetric.TOTAL_FIBER_TWIST.value: (0, 0), # TODO CompressionMetric.CALC_BENDING_ENERGY.value: (0, 10), CompressionMetric.CONTOUR_LENGTH.value: (480, 505), - CompressionMetric.COMPRESSION_RATIO.value: (0, 0), # TODO + CompressionMetric.COMPRESSION_RATIO.value: (0, 0), # TODO } return bounds.get(self.value, (0, 0)) diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index f6f900d..6a376d0 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -207,12 +207,14 @@ def get_bending_energy_from_trace( vec1 = polymer_trace[ind + 1] - polymer_trace[ind] vec2 = polymer_trace[ind + 2] - polymer_trace[ind + 1] - if np.isclose(np.linalg.norm(vec1), 0.) or np.isclose(np.linalg.norm(vec2), 0.): + if np.isclose(np.linalg.norm(vec1), 0.0) or np.isclose( + np.linalg.norm(vec2), 0.0 + ): # TODO handle this differently? - cos_angle[ind] = 0. + cos_angle[ind] = 0.0 print("Warning: zero vector in bending energy calculation.") continue - + cos_angle[ind] = ( np.dot(vec1, vec2) / np.linalg.norm(vec1) / np.linalg.norm(vec2) ) From 6699c55b8af6e5fb947128fca6a71760327cee85 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:48:58 -0400 Subject: [PATCH 37/63] Update pca analysis workflow to save results to bucket --- .../_run_pca_on_compression_simulations.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py index 630ccc8..221feab 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/_run_pca_on_compression_simulations.py @@ -25,9 +25,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% -from pathlib import Path - import pandas as pd +from io_collection.save.save_pickle import save_pickle from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import ( get_merged_data, @@ -69,7 +68,7 @@ condition_keys: list[str] = ["0047", "0150", "0470", "1500"] # Location to save analysis results (S3 bucket or local path) -save_location: str = str(Path(__file__).parents[3] / "analysis_outputs") +save_location: str = "s3://subcell-working-bucket" # %% [markdown] """ @@ -134,6 +133,14 @@ # %% pca_results, pca = run_pca(data) +# %% [markdown] +""" +## Save PCA object +""" + +# %% +save_pickle(save_location, "actin_compression_pca.pkl", pca) + # %% [markdown] """ ## Save PCA results @@ -151,6 +158,7 @@ """ ## Save PCA trajectories """ + # %% save_pca_trajectories( pca_results, save_location, "actin_compression_pca_trajectories.json" @@ -160,6 +168,7 @@ """ ## Save PCA transforms """ + # %% points: list[list[float]] = [ [-900, -600, -300, 0, 300, 600], From 18c54a52e9a1b43f41da37412757a24eecee1d6b Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:49:18 -0400 Subject: [PATCH 38/63] Update visualization README --- subcell_pipeline/visualization/README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/subcell_pipeline/visualization/README.md b/subcell_pipeline/visualization/README.md index fc3fc8f..3ea3d5b 100644 --- a/subcell_pipeline/visualization/README.md +++ b/subcell_pipeline/visualization/README.md @@ -1,6 +1,6 @@ # Visualization -Visualization of simulation trajectories using [Simularium](https://simularium.allencell.org/). +Visualization of simulation trajectories and data using [Simularium](https://simularium.allencell.org/). ## Individual simulations @@ -10,3 +10,11 @@ Visualization of simulation trajectories using [Simularium](https://simularium.a ## Combined simulations - **Visualize combined ReaDDy and Cytosim simulation trajectories** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_combined_trajectories.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_combined_trajectories.html)) + +## Tomography data + +- **Visualize actin CME tomography data** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_tomography_data.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_tomography_data.html)) + +## Dimensionality reduction + +- **Visualize dimensionality reduction analysis of actin filaments** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/visualization/_visualize_dimensionality_reduction.html)) From 7e5d06a85435238634e3567a1c6e1d03f21b2037 Mon Sep 17 00:00:00 2001 From: jessicasyu <15913767+jessicasyu@users.noreply.github.com> Date: Wed, 10 Jul 2024 15:52:25 -0400 Subject: [PATCH 39/63] Refactor PCA visualization --- subcell_pipeline/constants.py | 203 ---- subcell_pipeline/temporary_file_io.py | 138 --- .../_visualize_dimensionality_reduction.py | 113 +-- .../visualization/dimensionality_reduction.py | 120 +++ .../visualization/fiber_points.py | 72 ++ .../visualization/spatial_annotator.py | 4 +- subcell_pipeline/visualization/tomography.py | 61 +- subcell_pipeline/visualization/visualizer.py | 904 ------------------ 8 files changed, 239 insertions(+), 1376 deletions(-) delete mode 100644 subcell_pipeline/constants.py delete mode 100644 subcell_pipeline/temporary_file_io.py create mode 100644 subcell_pipeline/visualization/dimensionality_reduction.py create mode 100644 subcell_pipeline/visualization/fiber_points.py delete mode 100644 subcell_pipeline/visualization/visualizer.py diff --git a/subcell_pipeline/constants.py b/subcell_pipeline/constants.py deleted file mode 100644 index c679cad..0000000 --- a/subcell_pipeline/constants.py +++ /dev/null @@ -1,203 +0,0 @@ -"""Constants for parsing simulations.""" - -from typing import Dict, List, Union - -import numpy as np -from simulariumio import DISPLAY_TYPE, DisplayData - -WORKING_DIR_PATH: str = "data/" - -COLUMN_NAMES: List[str] = [ - "fiber_id", - "xpos", - "ypos", - "zpos", - "xforce", - "yforce", - "zforce", - "segment_curvature", - "time", - "fiber_point", -] - -COLUMN_DTYPES: Dict[str, Union[float, int]] = { - "fiber_id": int, - "xpos": float, - "ypos": float, - "zpos": float, - "xforce": float, - "yforce": float, - "zforce": float, - "segment_curvature": float, - "time": float, - "fiber_point": int, -} - -BOX_SIZE: np.ndarray = np.array(3 * [600.0]) - -READDY_TIMESTEP: float = 0.1 # ns - -READDY_SAVED_FRAMES: int = 1000 - -READDY_TOTAL_STEPS: Dict[str, int] = { - "ACTIN_NO_COMPRESSION": 1e7, - "ACTIN_COMPRESSION_VELOCITY_0047": 3.2e8, - "ACTIN_COMPRESSION_VELOCITY_0150": 1e8, - "ACTIN_COMPRESSION_VELOCITY_0470": 3.2e7, - "ACTIN_COMPRESSION_VELOCITY_1500": 1e7, -} - -# particle types correspond to types from simularium/readdy-models -ACTIN_START_PARTICLE_PHRASE: str = "pointed" -ACTIN_PARTICLE_TYPES: List[str] = [ - "actin#", - "actin#ATP_", - "actin#mid_", - "actin#mid_ATP_", - "actin#fixed_", - "actin#fixed_ATP_", - "actin#mid_fixed_", - "actin#mid_fixed_ATP_", - "actin#barbed_", - "actin#barbed_ATP_", - "actin#fixed_barbed_", - "actin#fixed_barbed_ATP_", -] - -# measured from crystal structure -IDEAL_ACTIN_POSITIONS: np.ndarray = np.array( - [ - [24.738, 20.881, 26.671], - [27.609, 24.061, 27.598], - [30.382, 21.190, 25.725], - ] -) -IDEAL_ACTIN_VECTOR_TO_AXIS: np.ndarray = np.array( - [-0.01056751, -1.47785105, -0.65833209] -) - -CYTOSIM_SCALE_FACTOR: float = 1000.0 - - -def READDY_DISPLAY_DATA() -> Dict[str, DisplayData]: - extra_radius = 1.5 - actin_radius = 2.0 + extra_radius - n_polymer_numbers = 5 - result = {} - for i in range(1, n_polymer_numbers + 1): - result.update( - { - f"actin#{i}": DisplayData( - name="actin", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#bf9b30", - ), - f"actin#mid_{i}": DisplayData( - name="actin#mid", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#bf9b30", - ), - f"actin#fixed_{i}": DisplayData( - name="actin#fixed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#bf9b30", - ), - f"actin#mid_fixed_{i}": DisplayData( - name="actin#mid_fixed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#bf9b30", - ), - f"actin#ATP_{i}": DisplayData( - name="actin#ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffbf00", - ), - f"actin#mid_ATP_{i}": DisplayData( - name="actin#mid_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffbf00", - ), - f"actin#fixed_ATP_{i}": DisplayData( - name="actin#fixed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffbf00", - ), - f"actin#mid_fixed_ATP_{i}": DisplayData( - name="actin#mid_fixed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffbf00", - ), - f"actin#barbed_{i}": DisplayData( - name="actin#barbed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffdc73", - ), - f"actin#barbed_ATP_{i}": DisplayData( - name="actin#barbed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffdc73", - ), - f"actin#fixed_barbed_{i}": DisplayData( - name="actin#fixed_barbed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffdc73", - ), - f"actin#fixed_barbed_ATP_{i}": DisplayData( - name="actin#fixed_barbed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#ffdc73", - ), - f"actin#pointed_{i}": DisplayData( - name="actin#pointed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#a67c00", - ), - f"actin#pointed_ATP_{i}": DisplayData( - name="actin#pointed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#a67c00", - ), - f"actin#pointed_fixed_{i}": DisplayData( - name="actin#pointed_fixed", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#a67c00", - ), - f"actin#pointed_fixed_ATP_{i}": DisplayData( - name="actin#pointed_fixed_ATP", - display_type=DISPLAY_TYPE.SPHERE, - radius=actin_radius, - color="#a67c00", - ), - }, - ) - return result - - -SIMULATOR_COLORS: Dict[str, str] = { - "cytosim": "#1cbfa4", - "readdy": "#ffae52", -} - - -TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] - - -TOMOGRAPHY_VIZ_SCALE: float = 1000.0 - - -TOMOGRAPHY_MIN_COMPRESSION: int = 2 diff --git a/subcell_pipeline/temporary_file_io.py b/subcell_pipeline/temporary_file_io.py deleted file mode 100644 index f5fb515..0000000 --- a/subcell_pipeline/temporary_file_io.py +++ /dev/null @@ -1,138 +0,0 @@ - -"""Methods for parsing ReaDDy simulations.""" - -import os - -import boto3 -from botocore.exceptions import ClientError - -from .constants import WORKING_DIR_PATH - - -s3_client = boto3.client("s3") - - -def make_working_directory() -> None: - """ - Make a local working directory at the - WORKING_DIR_PATH. - """ - if not os.path.isdir(WORKING_DIR_PATH): - os.makedirs(WORKING_DIR_PATH) - - -def _download_s3_file( - bucket: str, - key: str, - dest_path: str, -) -> bool: - """ - Download files from S3 - """ - if os.path.isfile(dest_path): - # already downloaded - return False - try: - s3_client.download_file( - bucket, - key, - dest_path, - ) - print(f"Downloaded {dest_path}") - return True - except ClientError: - print(f"!!! Failed to download {key}") - return False - - -def download_readdy_hdf5( - bucket: str, - series_name: str, - series_key: str, - rep_ix: int, -) -> bool: - """ - Download files from S3 - (ReaDDy Python pkg currently requires a local file path) - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - series_name - Name of simulation series. - series_key - Combination of series and condition names. - replicate_ix - Replicate index. - """ - make_working_directory() - aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" - local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") - return _download_s3_file(bucket, aws_h5_key, local_h5_path) - - -def download_all_readdy_outputs( - bucket: str, - series_name: str, - condition_keys: list[str], - n_replicates: int, -) -> None: - """ - Download ReaDDy simulation outputs for all conditions and replicates. - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - series_name - Name of simulation series. - condition_keys - List of condition keys. - n_replicates - Number of simulation replicates. - """ - make_working_directory() - - for condition_key in condition_keys: - series_key = f"{series_name}_{condition_key}" if condition_key else series_name - - for rep_ix in range(n_replicates): - - local_h5_path = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") - - # Skip if file already exists. - if os.path.isfile(local_h5_path): - print(f"ReaDDy file [ { local_h5_path } ] already downloaded. Skipping.") - continue - - aws_h5_key = f"{series_name}/outputs/{series_key}_{rep_ix}.h5" - _download_s3_file(bucket, aws_h5_key, local_h5_path) - - print(f"Downloaded data for [ {condition_key} ] replicate [ {rep_ix} ]") - - -def upload_file_to_s3(bucket: str, src_path: str, s3_path: str) -> bool: - """ - Upload a file to an S3 bucket - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - src_path - Local path to file to upload - s3_path - S3 key for where to save in the bucket - """ - if not os.path.isfile(src_path): - print(f"!!! File does not exist to upload {src_path}") - return False - try: - bucket = bucket.split("s3://")[-1] - s3_client.upload_file(src_path, bucket, s3_path) - print(f"Uploaded to {s3_path}") - return True - except ClientError: - print(f"!!! Failed to upload {src_path}") - return False diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index 9bd04f4..6f65130 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -3,85 +3,65 @@ # %% [markdown] """ - -Notebook contains steps for visualizing PCA space -for actin fibers. +Notebook contains steps for visualizing dimensionality reduction using PCA for +actin fibers. - [Pre-process Inputs](#pre-process-inputs) -- [Visualize Inverse PCA](#visualize-inverse-pca) +- [Visualize inverse PCA](#visualize-inverse-pca) """ # %% if __name__ != "__main__": raise ImportError("This module is a notebook and is not meant to be imported") +# %% +from pathlib import Path + +from subcell_pipeline.visualization.dimensionality_reduction import ( + visualize_dimensionality_reduction, +) # %% [markdown] """ -## Pre-process Inputs +## Define visualization settings -If more analysis outputs for PCA are saved in S3, this will no longer be necessary. - -- Input: `(series_name)/analysis/(series_name)_(align_key).csv` (for Cytosim and ReaDDy) -- Output: `actin_compression_pca_results.csv` and `actin_compression_pca.pkl` +Define simulation and visualization settings that are shared between different +simulation series. """ # %% -import pandas as pd -from io_collection.save.save_dataframe import save_dataframe -from io_collection.save.save_pickle import save_pickle -from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import get_merged_data -from subcell_pipeline.analysis.dimensionality_reduction.pca_dim_reduction import run_pca - -# Name of the simulation series -series_name: str = "COMPRESSION_VELOCITY" - # S3 bucket for input and output files bucket = "s3://subcell-working-bucket" -# S3 bucket Cytosim for input and output files -cytosim_bucket: str = "s3://cytosim-working-bucket" - -# S3 bucket ReaDDy for input and output files -readdy_bucket: str = "s3://readdy-working-bucket" - -# Random seeds for simulations -random_seeds: list[int] = [1, 2, 3, 4, 5] - -# List of condition file keys for each velocity -condition_keys: list[str] = ["0047", "0150", "0470", "1500"] +# File key for PCA results dataframe +pca_results_key = "actin_compression_pca_results.csv" -readdy_data = get_merged_data(readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds) -readdy_data["simulator"] = "readdy" +# File key for PCA object pickle +pca_pickle_key = "actin_compression_pca.pkl" -cytosim_data = get_merged_data( - cytosim_bucket, series_name, condition_keys, random_seeds -) -cytosim_data["simulator"] = "cytosim" +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" +temp_path.mkdir(parents=True, exist_ok=True) -data = pd.concat([cytosim_data, readdy_data]) -data["repeat"] = data["seed"] - 1 -data["velocity"] = data["key"].astype("int") / 10 +# Select how PC distributions are shown +# - True to scroll through the PC distributions over time if True +# - False to show all together in one timestep +distribution_over_time = False -time_map = { - ("cytosim", "0047"): 0.031685, - ("cytosim", "0150"): 0.01, - ("cytosim", "0470"): 0.00316, - ("cytosim", "1500"): 0.001, - ("readdy", "0047"): 1000, - ("readdy", "0150"): 1000, - ("readdy", "0470"): 1000, - ("readdy", "1500"): 1000, -} +# Select if simulator distributions are shown +# - True to show ReaDDy and Cytosim separately +# - False to show all together +simulator_detail = False -pca_results, pca = run_pca(data) +# Number of standard deviations to visualize +std_devs = 2.0 -save_dataframe(bucket, "actin_compression_pca_results.csv", pca_results) -save_pickle(bucket, "actin_compression_pca.pkl", pca) +# Number of samples for each PC distribution +sample_resolution = 5 # %% [markdown] """ -## Visualize Inverse PCA +## Visualize inverse PCA Visualize PCA space for actin fibers. @@ -89,27 +69,14 @@ - Output: `(name)/(name).simularium` """ -# %% -from subcell_pipeline.visualization.visualizer import ( - visualize_dimensionality_reduction, -) - # %% -# S3 bucket for input and output files -bucket = "s3://subcell-working-bucket" - -# File key for PCA results dataframe -pca_results_key = "actin_compression_pca_results.csv" - -# File key for PCA object pickle -pca_pickle_key = "actin_compression_pca.pkl" - -# Scroll through the PC distributions over time if True, otherwise show all together in one timestep -distribution_over_time = False - -# Also show distributions for ReaDDy and Cytosim if True, otherwise just all together -simulator_detail = False - visualize_dimensionality_reduction( - bucket, pca_results_key, pca_pickle_key, distribution_over_time, simulator_detail + bucket, + pca_results_key, + pca_pickle_key, + distribution_over_time, + simulator_detail, + std_devs, + sample_resolution, + str(temp_path), ) diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py new file mode 100644 index 0000000..346daab --- /dev/null +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -0,0 +1,120 @@ +import os + +import numpy as np +from io_collection.load.load_buffer import load_buffer +from io_collection.load.load_dataframe import load_dataframe +from io_collection.load.load_pickle import load_pickle +from io_collection.save.save_buffer import save_buffer +from simulariumio import DISPLAY_TYPE, CameraData, DisplayData, MetaData, UnitData + +from subcell_pipeline.visualization.fiber_points import ( + generate_trajectory_converter_for_fiber_points, +) + +BOX_SIZE: np.ndarray = np.array(3 * [600.0]) +"""Bounding box size for dimensionality reduction trajectory.""" + + +def visualize_dimensionality_reduction( + bucket: str, + pca_results_key: str, + pca_pickle_key: str, + distribution_over_time: bool, + simulator_detail: bool, + std_devs: float, + sample_resolution: int, + temp_path: str, +) -> None: + """ + Visualize PCA space for actin fibers. + + Parameters + ---------- + bucket + Name of S3 bucket for input and output files. + pca_results_key + File key for PCA results dataframe. + pca_pickle_key + File key for PCA object pickle. + distribution_over_time + True to scroll through the PC distributions over time, False otherwise. + simulator_detail + True to show individual simulator ranges, False otherwise. + std_devs + Number of standard deviations to visualize. + sample_resolution + Number of samples for each PC distribution. Should be odd. + temp_path + Local path for saving visualization output files. + """ + + if sample_resolution % 2 == 0: + sample_resolution += 1 + + pca_results = load_dataframe(bucket, pca_results_key) + pca = load_pickle(bucket, pca_pickle_key) + + fiber_points = [] + type_names = [] + display_data = {} + + inc = 2 * std_devs / (sample_resolution - 1) + samples = np.arange(-std_devs, std_devs + inc, inc) + stdev_pc1 = pca_results["PCA1"].std(ddof=0) + stdev_pc2 = pca_results["PCA2"].std(ddof=0) + data = { + "PC1": [samples * stdev_pc1, 0], + "PC2": [0, samples * stdev_pc2], + } + + if distribution_over_time: + for pc_ix, pc in enumerate(data): + fiber_points.append([]) + pca.inverse_transform(data[pc]).reshape(-1, 3) + for _ in samples: + fiber_points[pc_ix].append() + fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) + else: + for sample in samples: + for pc in data: + + import ipdb + + ipdb.set_trace() + + fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) + type_name = f"{pc}#{sample}" + type_names.append(type_name) + if type_name not in display_data: + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + ) + + meta_data = MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([10.0, 0.0, 200.0]), + look_at_position=np.array([10.0, 0.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin Compression Dimensionality Reduction", + ) + time_units = UnitData("count") # frames + spatial_units = UnitData("nm") # nanometers + + converter = generate_trajectory_converter_for_fiber_points( + fiber_points, + type_names, + meta_data, + display_data, + time_units, + spatial_units, + ) + + # Save locally and copy to bucket. + name = os.path.splitext(pca_pickle_key)[0] + local_file_path = os.path.join(temp_path, name) + converter.save(output_path=local_file_path) + output_key = f"{name}/{name}.simularium" + save_buffer(bucket, output_key, load_buffer(temp_path, f"{name}.simularium")) diff --git a/subcell_pipeline/visualization/fiber_points.py b/subcell_pipeline/visualization/fiber_points.py new file mode 100644 index 0000000..5cfc577 --- /dev/null +++ b/subcell_pipeline/visualization/fiber_points.py @@ -0,0 +1,72 @@ +import numpy as np +from simulariumio import ( + AgentData, + DisplayData, + MetaData, + TrajectoryConverter, + TrajectoryData, + UnitData, +) + + +def generate_trajectory_converter_for_fiber_points( + fiber_points: list[np.ndarray], + type_names: list[str], + meta_data: MetaData, + display_data: dict[str, DisplayData], + time_units: UnitData, + spatial_units: UnitData, +) -> TrajectoryConverter: + """ + Generate a TrajectoryConverter for the given fiber points. + + Parameters + ---------- + fiber_points + List of fibers, where each fiber has the shape (timesteps x points x 3). + type_names + List of type names. + meta_data + Simularium metadata object. + display_data + Map of type names to Simularium display data objects. + time_units + Time unit data. + spatial_units + Spatial unit data. + + Returns + ------- + : + Simularium trajectory converter. + """ + + # build subpoints array with correct dimensions + n_fibers = len(fiber_points) + total_steps = fiber_points[0].shape[0] + n_points = fiber_points[0].shape[1] + subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) + for time_ix in range(total_steps): + for fiber_ix in range(n_fibers): + subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] + subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) + + # convert to simularium + traj_data = TrajectoryData( + meta_data=meta_data, + agent_data=AgentData( + times=np.arange(total_steps), + n_agents=n_fibers * np.ones(total_steps), + viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 + unique_ids=np.array(total_steps * [list(range(n_fibers))]), + types=total_steps * [type_names], + positions=np.zeros((total_steps, n_fibers, 3)), + radii=0.5 * np.ones((total_steps, n_fibers)), + n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), + subpoints=subpoints, + display_data=display_data, + ), + time_units=time_units, + spatial_units=spatial_units, + ) + return TrajectoryConverter(traj_data) diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index 1392869..fe035b1 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -177,10 +177,10 @@ def add_sphere_agents( time_ix ][:n_spheres] new_agent_data.radii[time_ix][start_ix:end_ix] = n_spheres * [radius] - + # TODO use color parameter after finished debugging colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] - + for ix in range(max_spheres): tn = f"{type_name} {ix}" new_agent_data.display_data[tn] = DisplayData( diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py index 0d2269b..c3b49d8 100644 --- a/subcell_pipeline/visualization/tomography.py +++ b/subcell_pipeline/visualization/tomography.py @@ -6,19 +6,14 @@ from io_collection.load.load_buffer import load_buffer from io_collection.load.load_dataframe import load_dataframe from io_collection.save.save_buffer import save_buffer -from simulariumio import ( - AgentData, - CameraData, - DisplayData, - MetaData, - TrajectoryConverter, - TrajectoryData, - UnitData, -) +from simulariumio import CameraData, MetaData, TrajectoryConverter, UnitData from subcell_pipeline.analysis.compression_metrics.compression_metric import ( CompressionMetric, ) +from subcell_pipeline.visualization.fiber_points import ( + generate_trajectory_converter_for_fiber_points, +) from subcell_pipeline.visualization.histogram_plots import make_empty_histogram_plots from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator @@ -27,52 +22,6 @@ TOMOGRAPHY_VIZ_SCALE: float = 100.0 -def _generate_simularium_for_fiber_points( - fiber_points: list[np.ndarray], - type_names: list[str], - meta_data: MetaData, - display_data: dict[str, DisplayData], - time_units: UnitData, - spatial_units: UnitData, -) -> TrajectoryConverter: - """ - Generate a TrajectoryConverter for the given fiber points. - - Fiber points is a list of fibers, where each fiber has the shape (timesteps - x points x 3). - """ - - # build subpoints array with correct dimensions - n_fibers = len(fiber_points) - total_steps = fiber_points[0].shape[0] - n_points = fiber_points[0].shape[1] - subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) - for time_ix in range(total_steps): - for fiber_ix in range(n_fibers): - subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] - subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) - - # convert to simularium - traj_data = TrajectoryData( - meta_data=meta_data, - agent_data=AgentData( - times=np.arange(total_steps), - n_agents=n_fibers * np.ones(total_steps), - viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 - unique_ids=np.array(total_steps * [list(range(n_fibers))]), - types=total_steps * [type_names], - positions=np.zeros((total_steps, n_fibers, 3)), - radii=0.5 * np.ones((total_steps, n_fibers)), - n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), - subpoints=subpoints, - display_data=display_data, - ), - time_units=time_units, - spatial_units=spatial_units, - ) - return TrajectoryConverter(traj_data) - - def _add_tomography_plots( converter: TrajectoryConverter, metrics: list[CompressionMetric], @@ -156,7 +105,7 @@ def visualize_tomography( all_fiber_points.append(fiber_points) all_type_names.append(f"{dataset}#{fiber_index}") - converter = _generate_simularium_for_fiber_points( + converter = generate_trajectory_converter_for_fiber_points( all_fiber_points, all_type_names, MetaData( diff --git a/subcell_pipeline/visualization/visualizer.py b/subcell_pipeline/visualization/visualizer.py deleted file mode 100644 index a1d511f..0000000 --- a/subcell_pipeline/visualization/visualizer.py +++ /dev/null @@ -1,904 +0,0 @@ -#!/usr/bin/env python - -import os -from typing import Tuple, Dict, List - -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -from pint import UnitRegistry -from io_collection.keys.check_key import check_key -from io_collection.load.load_text import load_text -from io_collection.load.load_dataframe import load_dataframe -from io_collection.load.load_pickle import load_pickle -from simulariumio import ( - TrajectoryConverter, - MetaData, - InputFileData, - DisplayData, - DISPLAY_TYPE, - UnitData, - ScatterPlotData, - HistogramPlotData, - CameraData, - TrajectoryData, - AgentData, - DimensionData, -) -from simulariumio.filters import EveryNthTimestepFilter -from simulariumio.cytosim import CytosimConverter, CytosimData, CytosimObjectInfo -from simulariumio.readdy import ReaddyConverter, ReaddyData -from ..constants import ( - BOX_SIZE, - WORKING_DIR_PATH, - READDY_TIMESTEP, - READDY_TOTAL_STEPS, - READDY_SAVED_FRAMES, - READDY_DISPLAY_DATA, - CYTOSIM_SCALE_FACTOR, - SIMULATOR_COLORS, - TOMOGRAPHY_SAMPLE_COLUMNS, - TOMOGRAPHY_VIZ_SCALE, - TOMOGRAPHY_MIN_COMPRESSION, - TOMOGRAPHY_SCALE_FACTOR, -) - -from ..temporary_file_io import ( - download_readdy_hdf5, - upload_file_to_s3, - make_working_directory, -) -from ..analysis.compression_metrics.compression_metric import CompressionMetric -from ..simulation.readdy import ReaddyPostProcessor, load_readdy_fiber_points -from .spatial_annotator import SpatialAnnotator - -def _empty_scatter_plots( - total_steps: int = -1, - times: np.ndarray = None, - time_units: str = None, -) -> Dict[CompressionMetric, ScatterPlotData]: - if total_steps < 0 and times is None: - raise Exception("Either total_steps or times array is required for plots") - elif times is None: - # use normalized time - xlabel = "T (normalized)" - xtrace = (1 / float(total_steps)) * np.arange(total_steps) - else: - # use actual time - xlabel = f"T ({time_units})" - xtrace = times - total_steps = times.shape[0] - return { - CompressionMetric.AVERAGE_PERP_DISTANCE: ScatterPlotData( - title="Average Perpendicular Distance", - xaxis_title=xlabel, - yaxis_title="distance (nm)", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 85.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - CompressionMetric.CALC_BENDING_ENERGY: ScatterPlotData( - title="Bending Energy", - xaxis_title=xlabel, - yaxis_title="energy", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 10.0 * np.ones(total_steps), - }, - render_mode="lines", - ), - CompressionMetric.NON_COPLANARITY: ScatterPlotData( - title="Non-coplanarity", - xaxis_title=xlabel, - yaxis_title="3rd component variance from PCA", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.03 * np.ones(total_steps), - }, - render_mode="lines", - ), - CompressionMetric.PEAK_ASYMMETRY: ScatterPlotData( - title="Peak Asymmetry", - xaxis_title=xlabel, - yaxis_title="normalized peak distance", - xtrace=xtrace, - ytraces={ - "<<<": np.zeros(total_steps), - ">>>": 0.5 * np.ones(total_steps), - }, - render_mode="lines", - ), - CompressionMetric.CONTOUR_LENGTH: ScatterPlotData( - title="Contour Length", - xaxis_title=xlabel, - yaxis_title="filament contour length (nm)", - xtrace=xtrace, - ytraces={ - "<<<": 480 * np.ones(total_steps), - ">>>": 505 * np.ones(total_steps), - }, - render_mode="lines", - ), - } - - -def _generate_plot_data(fiber_points: np.ndarray) -> Dict[CompressionMetric, list[float]]: - """ - Calculate plot traces from fiber_points. - """ - n_points = int(fiber_points.shape[2] / 3.0) - result = { - CompressionMetric.AVERAGE_PERP_DISTANCE: [], - CompressionMetric.CALC_BENDING_ENERGY: [], - CompressionMetric.NON_COPLANARITY: [], - CompressionMetric.PEAK_ASYMMETRY: [], - CompressionMetric.CONTOUR_LENGTH: [], - } - total_steps = fiber_points.shape[0] - for time_ix in range(total_steps): - points = fiber_points[time_ix][0].reshape((n_points, 3)) - for metric in result.keys(): - result[metric].append( - metric.calculate_metric( - polymer_trace=points - ) - ) - return result - - -def _add_individual_plots( - converter: TrajectoryConverter, - fiber_points: np.ndarray, - times: np.ndarray, -) -> None: - """ - Add plots to an individual trajectory - using fiber_points to calculate metrics. - """ - scatter_plots = _empty_scatter_plots(times) - plot_data = _generate_plot_data(fiber_points) - for metric, plot in scatter_plots.items(): - plot.ytraces["filament"] = np.array(plot_data[metric]) - converter.add_plot(plot, "scatter") - - -def _add_readdy_spatial_annotations( - converter: TrajectoryConverter, - post_processor: ReaddyPostProcessor, - fiber_chain_ids: List[List[List[int]]], - axis_positions: List[List[np.ndarray]], - fiber_points: np.ndarray, -) -> None: - """ - Add visualizations of edges, normals, and control points - to the ReaDDy Simularium data. - """ - # edges - edges = post_processor.edge_positions() - converter._data = SpatialAnnotator.add_fiber_agents( - converter._data, - fiber_points=edges, - type_name="edge", - fiber_width=0.5, - color="#eaeaea", - ) - # normals - normals = post_processor.linear_fiber_normals( - fiber_chain_ids=fiber_chain_ids, - axis_positions=axis_positions, - normal_length=10.0, - ) - converter._data = SpatialAnnotator.add_fiber_agents( - converter._data, - fiber_points=normals, - type_name="normal", - fiber_width=0.5, - color="#685bf3", - ) - # control points - sphere_positions = [] - for time_ix in range(len(fiber_points)): - sphere_positions.append(fiber_points[time_ix][0]) - converter._data = SpatialAnnotator.add_sphere_agents( - converter._data, - sphere_positions, - type_name="fiber point", - radius=0.8, - color="#eaeaea", - ) - - -def _load_readdy_simularium(path_to_readdy_h5: str, series_key: str) -> TrajectoryConverter: - """ - Load from ReaDDy outputs and generate a TrajectoryConverter - to visualize an actin trajectory in Simularium. - """ - total_steps = READDY_TOTAL_STEPS[series_key] - return ReaddyConverter(ReaddyData( - timestep=1e-6 * (READDY_TIMESTEP * total_steps / READDY_SAVED_FRAMES), - path_to_readdy_h5=path_to_readdy_h5, - meta_data=MetaData( - box_size=BOX_SIZE, - camera_defaults=CameraData( - position=np.array([0.0, 0.0, 300.0]), - look_at_position=np.zeros(3), - up_vector=np.array([0.0, 1.0, 0.0]), - fov_degrees=120.0, - ), - scale_factor=1.0, - ), - display_data=READDY_DISPLAY_DATA(), - time_units=UnitData("ms"), - spatial_units=UnitData("nm"), - )) - -def _visualize_readdy_trajectory( - bucket: str, - series_name: str, - series_key: str, - rep_ix: int, - n_timepoints: int, - n_monomer_points: int, -) -> TrajectoryConverter: - """ - Save a Simularium file for a single ReaDDy trajectory with plots and spatial annotations. - """ - path_to_readdy_h5 = os.path.join(WORKING_DIR_PATH, f"{series_key}_{rep_ix}.h5") - converter = _load_readdy_simularium(path_to_readdy_h5, series_key) - - # load data shaped for analysis from a pickle if it exists, otherwise save one - post_processor, fiber_chain_ids, axis_positions, fiber_points, times = load_readdy_fiber_points( - bucket, series_name, series_key, rep_ix, n_timepoints, n_monomer_points - ) - _add_individual_plots(converter, fiber_points, times) - _add_readdy_spatial_annotations( - converter, post_processor, fiber_chain_ids, axis_positions, fiber_points - ) - - return converter - - -def _save_and_upload_simularium_file( - converter: TrajectoryConverter, - bucket: str, - output_key: str -) -> None: - """ - Save a local simularium file and upload it to s3. - """ - local_key = os.path.splitext(os.path.basename(output_key))[0] - local_output_path = os.path.join(WORKING_DIR_PATH, local_key) - make_working_directory() - - converter.save(local_output_path) - - # upload_file_to_s3(bucket, f"{local_output_path}.simularium", output_key) TODO - - -def visualize_individual_readdy_trajectories( - bucket: str, - series_name: str, - condition_keys: list[str], - n_replicates: int, - n_timepoints: int, - n_monomer_points: int, - recalculate: bool = True, -) -> None: - """ - Visualize individual ReaDDy simulations for select conditions and replicates. - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - series_name - Name of simulation series. - condition_keys - List of condition keys. - n_replicates - Number of simulation replicates. - n_timepoints - Number of timepoints to visualize. - n_monomer_points - Number of control points for each polymer trace. - recalculate - Overwrite any outputs that already exist? - """ - for condition_key in condition_keys: - series_key = f"{series_name}_{condition_key}" if condition_key else series_name - - for rep_ix in range(n_replicates): - rep_id = rep_ix + 1 - output_key = f"{series_name}/viz/{series_key}_{rep_id:06d}.simularium" - - # Skip if output file already exists. - if not recalculate and check_key(bucket, output_key): - print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") - continue - - print(f"Visualizing data for [ {condition_key} ] replicate [ {rep_ix} ]") - - download_readdy_hdf5(bucket, series_name, series_key, rep_ix) - converter = _visualize_readdy_trajectory( - bucket, - series_name, - series_key, - rep_ix, - n_timepoints, - n_monomer_points, - ) - _save_and_upload_simularium_file(converter, bucket, output_key) - - -ureg = UnitRegistry() - -def _find_time_units(raw_time: float, units: str = "s") -> Tuple[str, float]: - """ - Get the compact time units and a multiplier to put the times in those units - """ - time = ureg.Quantity(raw_time, units) - time = time.to_compact() - return "{:~}".format(time.units), time.magnitude / raw_time - - -def _filter_time(converter: TrajectoryConverter, n_timepoints: int) -> TrajectoryConverter: - """ - Use Simulariumio time filter - """ - time_inc = int(converter._data.agent_data.times.shape[0] / n_timepoints) - if time_inc < 2: - return converter - converter._data = converter.filter_data( - [ - EveryNthTimestepFilter( - n=time_inc, - ), - ] - ) - return converter - - -def _load_cytosim_simularium( - fiber_points_data: str, - singles_data: str, - n_timepoints: int, -) -> TrajectoryConverter: - """ - Load from Cytosim outputs and generate a TrajectoryConverter - to visualize an actin trajectory in Simularium. - """ - singles_display_data = DisplayData( - name="linker", - radius=0.01, - display_type=DISPLAY_TYPE.SPHERE, - color="#fff", - ) - converter = CytosimConverter(CytosimData( - meta_data=MetaData( - box_size=BOX_SIZE, - scale_factor=CYTOSIM_SCALE_FACTOR, - ), - object_info={ - "fibers": CytosimObjectInfo( - cytosim_file=InputFileData( - file_contents=fiber_points_data, - ), - display_data={ - 1: DisplayData( - name=f"actin", - radius=0.02, - display_type=DISPLAY_TYPE.FIBER, - ) - }, - ), - "singles" : CytosimObjectInfo( - cytosim_file=InputFileData( - file_contents=singles_data, - ), - display_data={ - 1 : singles_display_data, - 2 : singles_display_data, - 3 : singles_display_data, - 4 : singles_display_data, - } - ), - }, - )) - converter = _filter_time(converter, n_timepoints) - time_units, time_multiplier = _find_time_units(converter._data.agent_data.times[-1]) - converter._data.agent_data.times *= time_multiplier - converter._data.time_units = UnitData(time_units) - return converter - - -def _visualize_cytosim_trajectory( - fiber_points_data: str, - singles_data: str, - local_output_path: str, - n_timepoints: int, -) -> TrajectoryConverter: - """ - Save a Simularium file for a single Cytosim trajectory with plots. - """ - converter = _load_cytosim_simularium(fiber_points_data, singles_data, n_timepoints) - _add_individual_plots( - converter, - converter._data.agent_data.subpoints, - converter._data.agent_data.times - ) - return converter - - -def visualize_individual_cytosim_trajectories( - bucket: str, - series_name: str, - condition_keys: list[str], - random_seeds: list[int], - n_timepoints: int, - recalculate: bool = True, -) -> None: - """ - Visualize individual Cytosim simulations for select conditions and replicates. - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - series_name - Name of simulation series. - condition_keys - List of condition keys. - random_seeds - Random seeds for simulations. - n_timepoints - Number of timepoints to visualize. - recalculate - Overwrite any outputs that already exist? - """ - for condition_key in condition_keys: - series_key = f"{series_name}_{condition_key}" if condition_key else series_name - - for index, seed in enumerate(random_seeds): - output_key = f"{series_name}/viz/{series_key}_{seed:06d}.simularium" - - # Skip if output file already exists. - if not recalculate and check_key(bucket, output_key): - print(f"Simularium visualization [ { output_key } ] already exists. Skipping.") - continue - - output_key_template = f"{series_name}/outputs/{series_key}_{index}/%s" - fiber_points_data = load_text( - bucket, output_key_template % "fiber_points.txt" - ) - singles_data = load_text( - bucket, output_key_template % "singles.txt" - ) - local_output_path = os.path.join(WORKING_DIR_PATH, output_key) - converter = _visualize_cytosim_trajectory( - fiber_points_data, singles_data, local_output_path, n_timepoints - ) - _save_and_upload_simularium_file(converter, bucket, output_key) - - -def _load_fiber_points_from_dataframe( - simulator: str, - dataframe: pd.DataFrame, - n_timepoints: int -) -> np.ndarray: - """ - Load fiber points from pre-calculated dataframes - and generate a TrajectoryConverter to visualize - all actin trajectories together in Simularium. - """ - dataframe.sort_values(by=["time", "fiber_point"]) - total_steps = dataframe.time.unique().shape[0] - n_points = dataframe.fiber_point.unique().shape[0] - if total_steps != n_timepoints: - raise Exception( - f"Requested number of timesteps [ {n_timepoints} ] does not match " - f"number of timesteps in dataset [ {total_steps} ]." - ) - result = [] - for time_ix in range(total_steps): - result.append([]) - result[time_ix].append( - (CYTOSIM_SCALE_FACTOR if simulator == "cytosim" else 1) * np.array( - dataframe[time_ix * n_points : (time_ix + 1) * n_points][["xpos", "ypos", "zpos"]] - ) - ) - return np.array(result) - - -def _generate_simularium_for_fiber_points( - fiber_points: list[np.ndarray], - type_names: list[str], - meta_data: MetaData, - display_data: Dict[str, DisplayData], - time_units: UnitData, - spatial_units: UnitData, -) -> TrajectoryConverter: - """ - Generate a TrajectoryConverter for the fiber_points - (list of fibers, each = timesteps X points X 3) - """ - # build subpoints array with correct dimensions - n_fibers = len(fiber_points) - total_steps = fiber_points[0].shape[0] - n_points = fiber_points[0].shape[1] - subpoints = np.zeros((total_steps, n_fibers, n_points, 3)) - for time_ix in range(total_steps): - for fiber_ix in range(n_fibers): - subpoints[time_ix][fiber_ix] = fiber_points[fiber_ix][time_ix] - subpoints = subpoints.reshape((total_steps, n_fibers, 3 * n_points)) - # convert to simularium - traj_data = TrajectoryData( - meta_data=meta_data, - agent_data=AgentData( - times=np.arange(total_steps), - n_agents=n_fibers * np.ones((total_steps)), - viz_types=1001 * np.ones((total_steps, n_fibers)), # fiber viz type = 1001 - unique_ids=np.array(total_steps * [list(range(n_fibers))]), - types=total_steps * [type_names], - positions=np.zeros((total_steps, n_fibers, 3)), - radii=0.5 * np.ones((total_steps, n_fibers)), - n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), - subpoints=subpoints, - display_data=display_data, - ), - time_units=time_units, - spatial_units=spatial_units, - ) - return TrajectoryConverter(traj_data) - - -def _load_all_together_simularium( - fiber_points: list[np.ndarray], - type_names: list[str], - display_data: Dict[str, DisplayData], -) -> TrajectoryConverter: - """ - Generate a TrajectoryConverter with all simulations from ReaDDy and Cytosim together. - """ - meta_data=MetaData( - box_size=BOX_SIZE, - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin compression in Cytosim and Readdy", - ) - time_units=UnitData("count") # frames - spatial_units=UnitData("nm") # nanometer - return _generate_simularium_for_fiber_points( - fiber_points, - type_names, - meta_data, - display_data, - time_units, - spatial_units, - ) - - -def _add_combined_plots( - converter: TrajectoryConverter, - fiber_points: np.ndarray, - type_names: list[str], - n_timepoints: int, -) -> None: - """ - Add plots for all trajectories together - using fiber_points to calculate metrics. - """ - scatter_plots = _empty_scatter_plots(total_steps=n_timepoints) - for traj_ix in range(len(fiber_points)): - plot_data = _generate_plot_data(fiber_points[traj_ix]) - for metric, plot in scatter_plots.items(): - plot.ytraces[type_names[traj_ix]] = np.array(plot_data[metric]) - for metric, plot in scatter_plots.items(): - converter.add_plot(plot, "scatter") - - -def visualize_all_compressed_trajectories_together( - subcell_bucket: str, - readdy_bucket: str, - readdy_series_name: str, - cytosim_bucket: str, - cytosim_series_name: str, - condition_keys: list[str], - n_replicates: int, - n_timepoints: int, -) -> None: - """ - Visualize simulations from ReaDDy and Cytosim together - for select conditions and number of replicates. - - Parameters - ---------- - subcell_bucket - Name of S3 bucket for combined input and output files. - readdy_bucket - Name of S3 bucket for ReaDDy input and output files. - readdy_series_name - Name of ReaDDy simulation series. - cytosim_bucket - Name of S3 bucket for Cytosim input and output files. - cytosim_series_name - Name of Cytosim simulation series. - condition_keys - List of condition keys. - n_replicates - How many replicates to visualize. - n_timepoints - Number of timepoints to visualize. - """ - fiber_points = [] - type_names = [] - display_data = {} - for condition_key in condition_keys: - for index in range(n_replicates): - for simulator in SIMULATOR_COLORS: - - # get path of dataframe from simulation post-processing to use as input - rep_id = index + 1 - if simulator == "readdy": - bucket = readdy_bucket - df_key = f"{readdy_series_name}/data/{readdy_series_name}_{condition_key}_{rep_id:06d}.csv" - else: - bucket = cytosim_bucket - df_key = f"{cytosim_series_name}/samples/{cytosim_series_name}_{condition_key}_{rep_id:06d}.csv" - - # Skip if input dataframe does not exist. - if not check_key(bucket, df_key): - print(f"Dataframe not available for {simulator} [ { df_key } ]. Skipping.") - continue - - dataframe = load_dataframe(bucket, df_key) - fiber_points.append(_load_fiber_points_from_dataframe(simulator, dataframe, n_timepoints)) - condition = float(condition_key[:3] + "." + condition_key[-1]) - condition = round(condition) if condition_key[-1] == "0" else condition - type_names.append(f"{simulator}#{condition} um/s {index}") - display_data[type_names[-1]] = DisplayData( - name=type_names[-1], - display_type=DISPLAY_TYPE.FIBER, - color=SIMULATOR_COLORS[simulator], - ) - - converter = _load_all_together_simularium(fiber_points, type_names, display_data) - _add_combined_plots(converter, fiber_points, type_names, n_timepoints) - output_key = "actin_compression_cytosim_readdy.simularium" - _save_and_upload_simularium_file(converter, subcell_bucket, output_key) - - -def _empty_tomography_plots() -> Dict[CompressionMetric, HistogramPlotData]: - return { - CompressionMetric.CONTOUR_LENGTH : HistogramPlotData( - title="Contour Length", - xaxis_title="filament contour length (nm)", - traces={}, - ), - CompressionMetric.COMPRESSION_RATIO : HistogramPlotData( - title="Compression Percentage", - xaxis_title="percent (%)", - traces={}, - ), - CompressionMetric.AVERAGE_PERP_DISTANCE : HistogramPlotData( - title="Average Perpendicular Distance", - xaxis_title="distance (nm)", - traces={}, - ), - CompressionMetric.CALC_BENDING_ENERGY : HistogramPlotData( - title="Bending Energy", - xaxis_title="energy", - traces={}, - ), - CompressionMetric.NON_COPLANARITY : HistogramPlotData( - title="Non-coplanarity", - xaxis_title="3rd component variance from PCA", - traces={}, - ), - CompressionMetric.PEAK_ASYMMETRY : HistogramPlotData( - title="Peak Asymmetry", - xaxis_title="normalized peak distance", - traces={}, - ), - } - - -def _add_tomography_plots(fiber_points: list[np.ndarray], converter: TrajectoryConverter) -> None: - """ - Add plots to tomography data using pre-calculated metrics. - """ - plots = _empty_tomography_plots() - for metric in plots: - values = [] - for fiber in fiber_points: - values.append(metric.calculate_metric( - polymer_trace=fiber - )) - plots[metric].traces["actin"] = np.array(values) - if metric == CompressionMetric.COMPRESSION_RATIO: - plots[metric].traces["actin"] *= 100. - converter.add_plot(plots[metric], "histogram") - - -def _get_tomography_spatial_center_and_size(tomo_df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: - """ - Get the center and size of the tomography dataset in 3D space. - """ - ixs = [ - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[0]), - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[1]), - list(tomo_df.columns).index(TOMOGRAPHY_SAMPLE_COLUMNS[2]), - ] - unique_values = list(map(set, tomo_df.values.T)) - mins = [] - maxs = [] - for dim_ix in range(3): - d_values = np.array(list(unique_values[ixs[dim_ix]])) - mins.append(np.amin(d_values)) - maxs.append(np.amax(d_values)) - mins = np.array(mins) - maxs = np.array(maxs) - return mins + 0.5 * (maxs - mins), maxs - mins - - -def visualize_tomography(bucket: str, name: str) -> None: - """ - Visualize segmented tomography data for actin fibers. - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - name - Name of tomography dataset. - """ - tomo_key = f"{name}/{name}_coordinates_sampled.csv" - tomo_df = load_dataframe(bucket, tomo_key) - tomo_df = tomo_df.sort_values(by=["id", "monomer_ids"]) - tomo_df = tomo_df.reset_index(drop=True) - time_units = UnitData("count") - spatial_units = UnitData("um", 0.003) - names, ids = np.unique(np.array(list(tomo_df["id"])), return_index=True) - traj_ids = names[np.argsort(ids)] - for traj_id in traj_ids: - fiber_df = tomo_df.loc[tomo_df["id"] == traj_id] - center, box_size = _get_tomography_spatial_center_and_size(fiber_df) - fiber_points = TOMOGRAPHY_VIZ_SCALE * (np.array([fiber_df[["xpos", "ypos", "zpos"]]]) - center) - type_names = ["Raw data"] - display_data = { - "Raw data" : DisplayData( - name="Raw data", - display_type=DISPLAY_TYPE.FIBER, - color="#888888", - ) - } - converter = _generate_simularium_for_fiber_points( - [fiber_points], - type_names, - MetaData( - box_size=TOMOGRAPHY_VIZ_SCALE * box_size, - camera_defaults=CameraData(position=np.array([0.0, 0.0, 70.0])) - ), - display_data, - time_units, - spatial_units, - ) - - # TODO remove after debugging fiber point order - converter._data = SpatialAnnotator.add_sphere_agents( - converter._data, - [fiber_points[0]], - type_name="point", - radius=0.8, - ) - - _add_tomography_plots([fiber_points[0]], converter) - _save_and_upload_simularium_file(converter, bucket, f"{name}/{name}_{traj_id}.simularium") - - -def visualize_dimensionality_reduction( - bucket: str, - pca_results_key: str, - pca_pickle_key: str, - distribution_over_time: bool, - simulator_detail: bool, - std_devs: float, - sample_resolution: int, -) -> None: - """ - Visualize PCA space for actin fibers. - - Parameters - ---------- - bucket - Name of S3 bucket for input and output files. - pca_results_key - File key for PCA results dataframe. - pca_pickle_key - File key for PCA object pickle. - distribution_over_time - Scroll through the PC distributions over time? - Otherwise show all together in one timestep. - simulator_detail - Also show distributions for ReaDDy and Cytosim? - Otherwise just all together. - std_devs - How many standard deviations to visualize? - sample_resolution - How many samples to visualize for each PC distribution? - (should be an odd number) - """ - if sample_resolution % 2 == 0: - sample_resolution += 1 - - pca_results = load_dataframe(bucket, pca_results_key) - pca = load_pickle(bucket, pca_pickle_key) - - fiber_points = [] - type_names = [] - display_data = {} - - inc = 2 * std_devs / (sample_resolution - 1) - samples = np.arange(-std_devs, std_devs + inc, inc) - stdev_pc1 = pca_results["PCA1"].std(ddof=0) - stdev_pc2 = pca_results["PCA2"].std(ddof=0) - data = { - "PC1" : [sample * stdev_pc1, 0], - "PC2" : [0, sample * stdev_pc2], - } - - if distribution_over_time: - - for pc_ix, pc in enumerate(data): - fiber_points.append([]) - pca.inverse_transform(data[pc]).reshape(-1, 3) - for sample in samples: - fiber_points[pc_ix].append() - fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) - - else: - - for sample in samples: - for pc in data: - - import ipdb; ipdb.set_trace() - - fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) - type_name = f"{pc}#{sample}" - type_names.append(type_name) - if type_name not in display_data: - display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.FIBER, - ) - - meta_data=MetaData( - box_size=BOX_SIZE, - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin Compression Dimensionality Reduction", - ) - time_units=UnitData("count") # frames - spatial_units=UnitData("nm") # nanometers - - converter = _generate_simularium_for_fiber_points( - fiber_points, - type_names, - meta_data, - display_data, - time_units, - spatial_units, - ) - - output_key = os.path.splitext(pca_pickle_key)[0] - _save_and_upload_simularium_file(converter, bucket, f"{output_key}.simularium") From 0b1001fcd72b6bc4385ca7881a2ffde736cb641d Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 15 Jul 2024 15:30:19 -0700 Subject: [PATCH 40/63] merged in changes from testing PCA viz with options --- .../_visualize_dimensionality_reduction.py | 4 +- .../visualization/dimensionality_reduction.py | 166 +++++++++++++----- .../visualization/fiber_points.py | 3 +- 3 files changed, 129 insertions(+), 44 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index 6f65130..e83aea6 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -6,7 +6,7 @@ Notebook contains steps for visualizing dimensionality reduction using PCA for actin fibers. -- [Pre-process Inputs](#pre-process-inputs) +- [Define visualization settings](#define-visualization-settings) - [Visualize inverse PCA](#visualize-inverse-pca) """ @@ -44,7 +44,7 @@ temp_path.mkdir(parents=True, exist_ok=True) # Select how PC distributions are shown -# - True to scroll through the PC distributions over time if True +# - True to scroll through the PC distributions over time # - False to show all together in one timestep distribution_over_time = False diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index 346daab..a1af791 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -1,6 +1,10 @@ import os +from typing import Tuple import numpy as np +import matplotlib.pyplot as plt +from matplotlib.colors import Colormap +from sklearn.decomposition import PCA from io_collection.load.load_buffer import load_buffer from io_collection.load.load_dataframe import load_dataframe from io_collection.load.load_pickle import load_pickle @@ -15,6 +19,90 @@ """Bounding box size for dimensionality reduction trajectory.""" +def rgb_to_hex_color(color): + rgb = (int(255 * color[0]), int(255 * color[1]), int(255 * color[2])) + return "#%02x%02x%02x" % rgb + + +def pca_fiber_points_over_time( + stdev_pc1: float, + stdev_pc2: float, + samples: np.ndarray, + pca: PCA, + color_maps: list[Colormap], + simulator_name: str = "Combined", +) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: + """ + Get fiber_points for samples of the PC distributions + in order to visualize the samples over time + """ + color_map = color_maps[simulator_name] + if simulator_name == "Combined": + simulator_name = "" + if simulator_name: + simulator_name += "#" + fiber_points = [] + type_names = [] + display_data = {} + for pc_ix in range(2): + fiber_points.append([]) + for sample in samples: + if pc_ix < 1: + data = [sample * stdev_pc1, 0] + else: + data = [0, sample * stdev_pc2] + fiber_points[pc_ix].append(pca.inverse_transform(data).reshape(-1, 3)) + fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) + type_name = f"{simulator_name}PC{pc_ix + 1}" + type_names.append(type_name) + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color=rgb_to_hex_color(color_map(0.8)), + ) + return fiber_points, type_names, display_data + + +def pca_fiber_points_one_timestep( + stdev_pc1: float, + stdev_pc2: float, + samples: np.ndarray, + pca: PCA, + color_maps: list[Colormap], + simulator_name: str = "Combined", +) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: + """ + Get fiber_points for samples of the PC distributions + in order to visualize the samples together in one timestep. + """ + color_map = color_maps[simulator_name] + if simulator_name == "Combined": + simulator_name = "" + if simulator_name: + simulator_name += "_" + + fiber_points = [] + type_names = [] + display_data = {} + std_devs = np.max(samples) + for sample in samples: + data = [ + [sample * stdev_pc1, 0], + [0, sample * stdev_pc2], + ] + for pc_ix in range(2): + fiber_points.append(pca.inverse_transform(data[pc_ix]).reshape(1, -1, 3)) + type_name = f"{simulator_name}PC{pc_ix + 1}#{sample}" + type_names.append(type_name) + if type_name not in display_data: + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color=rgb_to_hex_color(color_map(abs(sample) / std_devs)), + ) + return fiber_points, type_names, display_data + + def visualize_dimensionality_reduction( bucket: str, pca_results_key: str, @@ -47,50 +135,42 @@ def visualize_dimensionality_reduction( temp_path Local path for saving visualization output files. """ - - if sample_resolution % 2 == 0: - sample_resolution += 1 - pca_results = load_dataframe(bucket, pca_results_key) pca = load_pickle(bucket, pca_pickle_key) - + fiber_points = [] type_names = [] display_data = {} - - inc = 2 * std_devs / (sample_resolution - 1) - samples = np.arange(-std_devs, std_devs + inc, inc) - stdev_pc1 = pca_results["PCA1"].std(ddof=0) - stdev_pc2 = pca_results["PCA2"].std(ddof=0) - data = { - "PC1": [samples * stdev_pc1, 0], - "PC2": [0, samples * stdev_pc2], + pca_results_simulators = { + "Combined" : pca_results, } - - if distribution_over_time: - for pc_ix, pc in enumerate(data): - fiber_points.append([]) - pca.inverse_transform(data[pc]).reshape(-1, 3) - for _ in samples: - fiber_points[pc_ix].append() - fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) - else: - for sample in samples: - for pc in data: - - import ipdb - - ipdb.set_trace() - - fiber_points.append(pca.inverse_transform(data[pc]).reshape(1, -1, 3)) - type_name = f"{pc}#{sample}" - type_names.append(type_name) - if type_name not in display_data: - display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.FIBER, - ) - + if simulator_detail: + pca_results_simulators["ReaDDy"] = pca_results.loc[pca_results["SIMULATOR"] == "READDY"] + pca_results_simulators["Cytosim"] = pca_results.loc[pca_results["SIMULATOR"] == "CYTOSIM"] + color_maps = { + "Combined" : plt.colormaps.get_cmap("RdPu"), + "ReaDDy" : plt.colormaps.get_cmap("YlOrRd"), + "Cytosim" : plt.colormaps.get_cmap("GnBu"), + } + + for simulator in pca_results_simulators: + inc = 2 * std_devs / (sample_resolution - 1) + samples = np.arange(-std_devs, std_devs + inc, inc) + results = pca_results_simulators[simulator] + stdev_pc1 = float(results["PCA1"].std(ddof=0)) + stdev_pc2 = float(results["PCA2"].std(ddof=0)) + if distribution_over_time: + _fiber_points, _type_names, _display_data = pca_fiber_points_over_time( + stdev_pc1, stdev_pc2, samples, pca, color_maps, simulator + ) + else: + _fiber_points, _type_names, _display_data = pca_fiber_points_one_timestep( + stdev_pc1, stdev_pc2, samples, pca, color_maps, simulator + ) + fiber_points += _fiber_points + type_names += _type_names + display_data = {**display_data, **_display_data} + meta_data = MetaData( box_size=BOX_SIZE, camera_defaults=CameraData( @@ -110,11 +190,15 @@ def visualize_dimensionality_reduction( display_data, time_units, spatial_units, + fiber_radius=1.0, ) # Save locally and copy to bucket. name = os.path.splitext(pca_pickle_key)[0] - local_file_path = os.path.join(temp_path, name) + output_key = name + output_key += "_time" if distribution_over_time else "" + output_key += "_simulators" if simulator_detail else "" + local_file_path = os.path.join(temp_path, output_key) converter.save(output_path=local_file_path) - output_key = f"{name}/{name}.simularium" - save_buffer(bucket, output_key, load_buffer(temp_path, f"{name}.simularium")) + output_key = f"{output_key}.simularium" + save_buffer(bucket, f"{name}/{output_key}", load_buffer(temp_path, output_key)) diff --git a/subcell_pipeline/visualization/fiber_points.py b/subcell_pipeline/visualization/fiber_points.py index 5cfc577..be172f0 100644 --- a/subcell_pipeline/visualization/fiber_points.py +++ b/subcell_pipeline/visualization/fiber_points.py @@ -16,6 +16,7 @@ def generate_trajectory_converter_for_fiber_points( display_data: dict[str, DisplayData], time_units: UnitData, spatial_units: UnitData, + fiber_radius: float = 0.5, ) -> TrajectoryConverter: """ Generate a TrajectoryConverter for the given fiber points. @@ -61,7 +62,7 @@ def generate_trajectory_converter_for_fiber_points( unique_ids=np.array(total_steps * [list(range(n_fibers))]), types=total_steps * [type_names], positions=np.zeros((total_steps, n_fibers, 3)), - radii=0.5 * np.ones((total_steps, n_fibers)), + radii=fiber_radius * np.ones((total_steps, n_fibers)), n_subpoints=3 * n_points * np.ones((total_steps, n_fibers)), subpoints=subpoints, display_data=display_data, From d81f78cb2a2567f3d01681baff6a4bf7d8e1b30f Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 15 Jul 2024 16:55:57 -0700 Subject: [PATCH 41/63] update colors and camera position for combined viz --- .../visualization/_visualize_combined_trajectories.py | 4 ++-- subcell_pipeline/visualization/combined_trajectory.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_combined_trajectories.py b/subcell_pipeline/visualization/_visualize_combined_trajectories.py index e2e8c22..a4c82c9 100644 --- a/subcell_pipeline/visualization/_visualize_combined_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_combined_trajectories.py @@ -54,8 +54,8 @@ # List of simulators and colors simulator_colors = { - "cytosim": "#1cbfa4", - "readdy": "#ffae52", + "cytosim": "#1cbfaa", + "readdy": "#ff8f52", } # Temporary path to save visualization files diff --git a/subcell_pipeline/visualization/combined_trajectory.py b/subcell_pipeline/visualization/combined_trajectory.py index d19b60a..18f498f 100644 --- a/subcell_pipeline/visualization/combined_trajectory.py +++ b/subcell_pipeline/visualization/combined_trajectory.py @@ -78,8 +78,8 @@ def get_combined_trajectory_converter( meta_data=MetaData( box_size=BOX_SIZE, camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), + position=np.array([75.0, 220.0, 15.0]), + look_at_position=np.array([75.0, 75.0, 0.0]), fov_degrees=60.0, ), trajectory_title="Actin compression in Cytosim and Readdy", @@ -134,6 +134,7 @@ def visualize_combined_trajectories( simulator_colors: dict[str, str], temp_path: str, metrics: Optional[list[CompressionMetric]] = None, + recalculate: bool = False, ) -> None: """ Visualize combined simulations from ReaDDy and Cytosim for select conditions @@ -180,7 +181,7 @@ def visualize_combined_trajectories( condition_keys, replicates, metrics, - recalculate=False, + recalculate=recalculate, ) else: metrics = [] From 64c87621e194808867c2e95a96e1280cca0f3664 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 16 Jul 2024 14:39:15 -0700 Subject: [PATCH 42/63] PCA viz tested with all options --- .../_visualize_dimensionality_reduction.py | 18 +- .../visualization/dimensionality_reduction.py | 233 +++++++++++------- 2 files changed, 151 insertions(+), 100 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index e83aea6..98aeadb 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -46,18 +46,24 @@ # Select how PC distributions are shown # - True to scroll through the PC distributions over time # - False to show all together in one timestep -distribution_over_time = False +distribution_over_time = True # Select if simulator distributions are shown # - True to show ReaDDy and Cytosim separately # - False to show all together simulator_detail = False -# Number of standard deviations to visualize -std_devs = 2.0 +# Ranges to sample for each PC +range_pc1 = [-1200, 900] +range_pc2 = [-550, 250] + +# Select how PCs are saved +# - True to save each PC in a separate file +# - False to save all together +separate_pcs = True # Number of samples for each PC distribution -sample_resolution = 5 +sample_resolution = 200 # %% [markdown] """ @@ -76,7 +82,9 @@ pca_pickle_key, distribution_over_time, simulator_detail, - std_devs, + range_pc1, + range_pc2, + separate_pcs, sample_resolution, str(temp_path), ) diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index a1af791..58035d2 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -25,50 +25,42 @@ def rgb_to_hex_color(color): def pca_fiber_points_over_time( - stdev_pc1: float, - stdev_pc2: float, - samples: np.ndarray, + samples: list[np.ndarray], pca: PCA, - color_maps: list[Colormap], + pc_ix: int, simulator_name: str = "Combined", ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: """ Get fiber_points for samples of the PC distributions in order to visualize the samples over time """ - color_map = color_maps[simulator_name] if simulator_name == "Combined": simulator_name = "" if simulator_name: simulator_name += "#" fiber_points = [] - type_names = [] display_data = {} - for pc_ix in range(2): - fiber_points.append([]) - for sample in samples: - if pc_ix < 1: - data = [sample * stdev_pc1, 0] - else: - data = [0, sample * stdev_pc2] - fiber_points[pc_ix].append(pca.inverse_transform(data).reshape(-1, 3)) - fiber_points[pc_ix] = np.array(fiber_points[pc_ix]) - type_name = f"{simulator_name}PC{pc_ix + 1}" - type_names.append(type_name) - display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.FIBER, - color=rgb_to_hex_color(color_map(0.8)), - ) - return fiber_points, type_names, display_data + for sample_ix in range(len(samples[0])): + if pc_ix < 1: + data = [samples[0][sample_ix], 0] + else: + data = [0, samples[1][sample_ix]] + fiber_points.append(pca.inverse_transform(data).reshape(-1, 3)) + fiber_points = np.array(fiber_points) + type_name = f"{simulator_name}PC{pc_ix + 1}" + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color="#eaeaea", + ) + return [fiber_points], [type_name], display_data def pca_fiber_points_one_timestep( - stdev_pc1: float, - stdev_pc2: float, - samples: np.ndarray, + samples: list[np.ndarray], pca: PCA, color_maps: list[Colormap], + pc_ix: int, simulator_name: str = "Combined", ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: """ @@ -84,32 +76,80 @@ def pca_fiber_points_one_timestep( fiber_points = [] type_names = [] display_data = {} - std_devs = np.max(samples) - for sample in samples: + for sample_ix in range(len(samples[0])): data = [ - [sample * stdev_pc1, 0], - [0, sample * stdev_pc2], + [samples[0][sample_ix], 0], + [0, samples[1][sample_ix]], ] - for pc_ix in range(2): - fiber_points.append(pca.inverse_transform(data[pc_ix]).reshape(1, -1, 3)) - type_name = f"{simulator_name}PC{pc_ix + 1}#{sample}" - type_names.append(type_name) - if type_name not in display_data: - display_data[type_name] = DisplayData( - name=type_name, - display_type=DISPLAY_TYPE.FIBER, - color=rgb_to_hex_color(color_map(abs(sample) / std_devs)), - ) + fiber_points.append(pca.inverse_transform(data[pc_ix]).reshape(1, -1, 3)) + sample = samples[pc_ix][sample_ix] + sample_name = str(round(sample)) + type_name = f"{simulator_name}PC{pc_ix + 1}#{sample_name}" + type_names.append(type_name) + if type_name not in display_data: + color_range = -samples[pc_ix][0] + display_data[type_name] = DisplayData( + name=type_name, + display_type=DISPLAY_TYPE.FIBER, + color=rgb_to_hex_color(color_map(abs(sample) / color_range)), + ) return fiber_points, type_names, display_data +def generate_simularium_and_save( + name: str, + fiber_points: list[np.ndarray], + type_names: list[str], + display_data: dict[str, DisplayData], + distribution_over_time: bool, + simulator_detail: bool, + bucket: str, + temp_path: str, + pc: str, +) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: + """ + Generate a Simulariumio object for the fiber points and save it. + """ + meta_data = MetaData( + box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([0.0, 70.0, 350.0]), + look_at_position=np.array([0.0, 70.0, 0.0]), + fov_degrees=60.0, + ), + trajectory_title="Actin Compression Dimensionality Reduction", + ) + time_units = UnitData("count") # frames + spatial_units = UnitData("nm") # nanometers + converter = generate_trajectory_converter_for_fiber_points( + fiber_points, + type_names, + meta_data, + display_data, + time_units, + spatial_units, + fiber_radius=1.0, + ) + + # Save locally and copy to bucket. + output_key = name + output_key += "_time" if distribution_over_time else "" + output_key += "_simulators" if simulator_detail else "" + output_key += f"_pc{pc}" if pc else "" + local_file_path = os.path.join(temp_path, output_key) + converter.save(output_path=local_file_path) + output_key = f"{output_key}.simularium" + save_buffer(bucket, f"{name}/{output_key}", load_buffer(temp_path, output_key)) + def visualize_dimensionality_reduction( bucket: str, pca_results_key: str, pca_pickle_key: str, distribution_over_time: bool, simulator_detail: bool, - std_devs: float, + range_pc1: list[float], + range_pc2: list[float], + separate_pcs: bool, sample_resolution: int, temp_path: str, ) -> None: @@ -128,19 +168,23 @@ def visualize_dimensionality_reduction( True to scroll through the PC distributions over time, False otherwise. simulator_detail True to show individual simulator ranges, False otherwise. - std_devs - Number of standard deviations to visualize. + range_pc1 + Min and max values of PC1 to visualize. + range_pc2 + Min and max values of PC2 to visualize. + separate_pcs + True to Visualize PCs in separate files, False otherwise. sample_resolution - Number of samples for each PC distribution. Should be odd. + Number of samples for each PC distribution. temp_path Local path for saving visualization output files. """ pca_results = load_dataframe(bucket, pca_results_key) pca = load_pickle(bucket, pca_pickle_key) - fiber_points = [] - type_names = [] - display_data = {} + fiber_points = [[], []] if separate_pcs else [] + type_names = [[], []] if separate_pcs else [] + display_data = [{}, {}] if separate_pcs else {} pca_results_simulators = { "Combined" : pca_results, } @@ -152,53 +196,52 @@ def visualize_dimensionality_reduction( "ReaDDy" : plt.colormaps.get_cmap("YlOrRd"), "Cytosim" : plt.colormaps.get_cmap("GnBu"), } - + dataset_name = os.path.splitext(pca_pickle_key)[0] + pc_ixs = list(range(2)) for simulator in pca_results_simulators: - inc = 2 * std_devs / (sample_resolution - 1) - samples = np.arange(-std_devs, std_devs + inc, inc) - results = pca_results_simulators[simulator] - stdev_pc1 = float(results["PCA1"].std(ddof=0)) - stdev_pc2 = float(results["PCA2"].std(ddof=0)) - if distribution_over_time: - _fiber_points, _type_names, _display_data = pca_fiber_points_over_time( - stdev_pc1, stdev_pc2, samples, pca, color_maps, simulator - ) - else: - _fiber_points, _type_names, _display_data = pca_fiber_points_one_timestep( - stdev_pc1, stdev_pc2, samples, pca, color_maps, simulator + samples = [ + np.arange(range_pc1[0], range_pc1[1], (range_pc1[1] - range_pc1[0]) / float(sample_resolution)), + np.arange(range_pc2[0], range_pc2[1], (range_pc2[1] - range_pc2[0]) / float(sample_resolution)), + ] + for pc_ix in pc_ixs: + if distribution_over_time: + _fiber_points, _type_names, _display_data = pca_fiber_points_over_time( + samples, pca, pc_ix, simulator + ) + else: + _fiber_points, _type_names, _display_data = pca_fiber_points_one_timestep( + samples, pca, color_maps, pc_ix, simulator + ) + if separate_pcs: + fiber_points[pc_ix] += _fiber_points + type_names[pc_ix] += _type_names + display_data[pc_ix] = {**display_data[pc_ix], **_display_data} + else: + fiber_points += _fiber_points + type_names += _type_names + display_data = {**display_data, **_display_data} + if separate_pcs: + for pc_ix in pc_ixs: + generate_simularium_and_save( + dataset_name, + fiber_points[pc_ix], + type_names[pc_ix], + display_data[pc_ix], + distribution_over_time, + simulator_detail, + bucket, + temp_path, + str(pc_ix + 1), ) - fiber_points += _fiber_points - type_names += _type_names - display_data = {**display_data, **_display_data} - - meta_data = MetaData( - box_size=BOX_SIZE, - camera_defaults=CameraData( - position=np.array([10.0, 0.0, 200.0]), - look_at_position=np.array([10.0, 0.0, 0.0]), - fov_degrees=60.0, - ), - trajectory_title="Actin Compression Dimensionality Reduction", - ) - time_units = UnitData("count") # frames - spatial_units = UnitData("nm") # nanometers - - converter = generate_trajectory_converter_for_fiber_points( - fiber_points, - type_names, - meta_data, - display_data, - time_units, - spatial_units, - fiber_radius=1.0, - ) - - # Save locally and copy to bucket. - name = os.path.splitext(pca_pickle_key)[0] - output_key = name - output_key += "_time" if distribution_over_time else "" - output_key += "_simulators" if simulator_detail else "" - local_file_path = os.path.join(temp_path, output_key) - converter.save(output_path=local_file_path) - output_key = f"{output_key}.simularium" - save_buffer(bucket, f"{name}/{output_key}", load_buffer(temp_path, output_key)) + else: + generate_simularium_and_save( + dataset_name, + fiber_points, + type_names, + display_data, + distribution_over_time, + simulator_detail, + bucket, + temp_path, + "", + ) From d4cf64dae99f4a48508ce2a5eb5118737a04a497 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Tue, 16 Jul 2024 21:03:38 -0700 Subject: [PATCH 43/63] test and tweak individual readdy viz --- .gitignore | 1 + .../compression_metrics/compression_metric.py | 2 +- .../dimensionality_reduction/fiber_data.py | 8 +-- .../simulation/readdy/data_structures.py | 8 +-- subcell_pipeline/simulation/readdy/loader.py | 13 +--- .../simulation/readdy/post_processor.py | 72 +++++++++++++++++-- .../visualization/individual_trajectory.py | 40 ++++++----- .../visualization/spatial_annotator.py | 12 ++-- subcell_pipeline/visualization/tomography.py | 2 +- 9 files changed, 111 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 1621198..b5be84e 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,4 @@ ENV/ *.cym *.simularium **/analysis_outputs/** +*.h5 diff --git a/subcell_pipeline/analysis/compression_metrics/compression_metric.py b/subcell_pipeline/analysis/compression_metrics/compression_metric.py index f2cf0f2..2df1a01 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_metric.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_metric.py @@ -109,7 +109,7 @@ def bounds(self: Enum) -> tuple[float, float]: CompressionMetric.TOTAL_FIBER_TWIST.value: (0, 0), # TODO CompressionMetric.CALC_BENDING_ENERGY.value: (0, 10), CompressionMetric.CONTOUR_LENGTH.value: (480, 505), - CompressionMetric.COMPRESSION_RATIO.value: (0, 0), # TODO + CompressionMetric.COMPRESSION_RATIO.value: (0, 1), # TODO } return bounds.get(self.value, (0, 0)) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py index df98320..0d11135 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py @@ -93,7 +93,7 @@ def align_fibers(data: pd.DataFrame) -> None: if time == 0: fiber = coords else: - fiber = align_fiber(coords) + fiber, _ = align_fiber(coords) aligned_fibers.append(fiber) @@ -104,9 +104,9 @@ def align_fibers(data: pd.DataFrame) -> None: data["zpos"] = all_aligned_fibers[:, 2] -def align_fiber(coords: np.ndarray) -> np.ndarray: +def align_fiber(coords: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ - Align an array of x, y, z coordinates along the positive y axis. + Align an array of x, y, z coordinates along the positive x axis. The function identifies the furthest point in the yz-plane and computes the angle needed to rotate this point to lie on the positive y axis. This @@ -132,7 +132,7 @@ def align_fiber(coords: np.ndarray) -> np.ndarray: # Rotate y and z rotated = np.dot(coords[:, 1:], rot) - return np.concatenate((coords[:, 0:1], rotated), axis=1) + return np.concatenate((coords[:, 0:1], rotated), axis=1), rot def reshape_fibers(data: pd.DataFrame) -> tuple[np.ndarray, pd.DataFrame]: diff --git a/subcell_pipeline/simulation/readdy/data_structures.py b/subcell_pipeline/simulation/readdy/data_structures.py index a1873d6..2d32040 100644 --- a/subcell_pipeline/simulation/readdy/data_structures.py +++ b/subcell_pipeline/simulation/readdy/data_structures.py @@ -78,20 +78,20 @@ class FrameData: particles: dict[int, ParticleData] """Mapping of particle ID to a ParticleData for each particle.""" - edges: list[np.ndarray] - """List of edges as position of each of the two particles connected by the edge.""" + edge_ids: list[list[int]] + """List of edges, each is a list of the IDs of the two connected particles.""" def __init__( self, time: float, topologies: Optional[dict[int, TopologyData]] = None, particles: Optional[dict[int, ParticleData]] = None, - edges: Optional[list[np.ndarray]] = None, + edge_ids: Optional[list[list[int]]] = None, ): self.time = time self.topologies = topologies if topologies is not None else {} self.particles = particles if particles is not None else {} - self.edges = edges if edges is not None else [] + self.edge_ids = edge_ids if edge_ids is not None else [] def __str__(self) -> str: top_str = "\n" diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index e0f2366..f83acd4 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -122,7 +122,7 @@ def _shape_trajectory_data(self) -> list[FrameData]: ): continue frame = FrameData(time=self.timestep * time_ix) - edge_ids = ReaddyLoader._frame_edges(time_ix, topology_records) + frame.edge_ids = ReaddyLoader._frame_edges(time_ix, topology_records) for index, top in enumerate(topology_records[time_ix]): frame.topologies[index] = TopologyData( uid=index, @@ -133,7 +133,7 @@ def _shape_trajectory_data(self) -> list[FrameData]: p_id = ids[time_ix][p] position = positions[time_ix][p] neighbor_ids = [] - for edge in edge_ids: + for edge in frame.edge_ids: if p_id == edge[0]: neighbor_ids.append(edge[1]) elif p_id == edge[1]: @@ -146,15 +146,6 @@ def _shape_trajectory_data(self) -> list[FrameData]: position=np.array([position[0], position[1], position[2]]), neighbor_ids=neighbor_ids, ) - for edge in edge_ids: - frame.edges.append( - np.array( - [ - frame.particles[edge[0]].position, - frame.particles[edge[1]].position, - ] - ) - ) result.append(frame) return result diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index 9dbe1db..f535977 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -9,6 +9,7 @@ from subcell_pipeline.analysis.compression_metrics.polymer_trace import ( get_contour_length_from_trace, ) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fiber from subcell_pipeline.simulation.readdy.data_structures import FrameData ACTIN_START_PARTICLE_PHRASE: list[str] = ["pointed"] @@ -211,6 +212,58 @@ def _rotation( np.linalg.inv(self._orientation_from_positions(ideal_positions)), ) + def rotate_positions(self, positions: np.ndarray, rotation: np.ndarray) -> np.ndarray: + """ + Rotate an x,y,z position (or an array of them) around the x-axis + with the given rotation matrix. + """ + if len(positions.shape) > 1: + result = np.dot(positions[:, 1:], rotation) + return np.concatenate((positions[:, 0:1], result), axis=1) + else: + result = np.dot(positions[1:], rotation) + return np.concatenate((positions[0:1], result), axis=0) + + def align_trajectory( + self, + fiber_points: list[list[np.ndarray]], + ) -> tuple[np.ndarray, list[list[np.ndarray]]]: + """ + Align the positions of particles in the trajectory + so that the furthest point from the x-axis + is aligned with the positive y-axis at the last time point. + + Parameters + ---------- + fiber_points + How many numbers are used to represent the relative identity of + particles in the chain? + start_particle_phrases + List of phrases in particle type names for the first particles in + the linear chain. + other_particle_types + List of particle type names (without polymer numbers at the end) for + the particles other than the start particles. + + Returns + ------- + positions + Array (shape = timesteps x 1 x n x 3) containing the x,y,z positions + of actin monomer particles at each timestep. + fiber_points + List of lists of arrays (shape = n x 3) containing the x,y,z positions + of control points for each fiber at each time. + """ + result = [] + _, rotation = align_fiber(fiber_points[-1][0]) + for time_ix in range(len(self.trajectory)): + result.append([]) + for _, particle in self.trajectory[time_ix].particles.items(): + particle.position = self.rotate_positions(particle.position, rotation) + result[time_ix].append(particle.position) + fiber_points[time_ix][0] = self.rotate_positions(fiber_points[time_ix][0], rotation) + return np.array(result), fiber_points + def linear_fiber_chain_ids( self, polymer_number_range: int, @@ -412,8 +465,8 @@ def linear_fiber_control_points( Returns ------- : - Array (shape = n x 3) containing the x,y,z positions of control - points for each fiber at each time. + List of lists of arrays (shape = n x 3) containing the x,y,z positions + of control points for each fiber at each time. """ if n_points < 2: raise Exception("n_points must be > 1 to define a fiber.") @@ -526,10 +579,19 @@ def edge_positions(self) -> list[list[np.ndarray]]: Returns ------- : - List of list of edges as position of each of the two particles - connected by the edge for each edge at each time. + List of list of edges as position of each of the two connected particles + for each edge at each time. """ edges = [] for frame in self.trajectory: - edges.append(frame.edges) + edges.append([]) + for edge in frame.edge_ids: + edges[-1].append( + np.array( + [ + frame.particles[edge[0]].position, + frame.particles[edge[1]].position, + ] + ) + ) return edges diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index b9cfccf..b619d93 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -51,10 +51,11 @@ def _add_individual_plots( converter: TrajectoryConverter, metrics: list[CompressionMetric], metrics_data: pd.DataFrame, + times: np.ndarray, + time_units: UnitData, ) -> None: """Add plots to individual trajectory with calculated metrics.""" - times = metrics_data["time"].values - scatter_plots = make_empty_scatter_plots(metrics, times=times) + scatter_plots = make_empty_scatter_plots(metrics, times=times, time_units=time_units) for metric, plot in scatter_plots.items(): plot.ytraces["filament"] = np.array(metrics_data[metric.value]) converter.add_plot(plot, "scatter") @@ -69,8 +70,17 @@ def _add_readdy_spatial_annotations( Add visualizations of edges, normals, and control points to the ReaDDy Simularium data. """ - # edges + fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) + fiber_points = post_processor.linear_fiber_control_points( + axis_positions=axis_positions, + n_points=n_monomer_points, + ) + converter._data.agent_data.positions, fiber_points = post_processor.align_trajectory(fiber_points) + axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) edges = post_processor.edge_positions() + + # edges converter._data = SpatialAnnotator.add_fiber_agents( converter._data, fiber_points=edges, @@ -79,13 +89,6 @@ def _add_readdy_spatial_annotations( color="#eaeaea", ) - fiber_chain_ids = post_processor.linear_fiber_chain_ids(polymer_number_range=5) - axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) - fiber_points = post_processor.linear_fiber_control_points( - axis_positions=axis_positions, - n_points=n_monomer_points, - ) - # normals normals = post_processor.linear_fiber_normals( fiber_chain_ids=fiber_chain_ids, @@ -109,18 +112,18 @@ def _add_readdy_spatial_annotations( sphere_positions, type_name="fiber point", radius=0.8, - color="#eaeaea", + rainbow_colors=True, ) def _get_readdy_simularium_converter( - path_to_readdy_h5: str, total_steps: int + path_to_readdy_h5: str, total_steps: int, n_timepoints: int, ) -> TrajectoryConverter: """ Load from ReaDDy outputs and generate a TrajectoryConverter to visualize an actin trajectory in Simularium. """ - return ReaddyConverter( + converter = ReaddyConverter( ReaddyData( timestep=1e-6 * (READDY_TIMESTEP * total_steps / READDY_SAVED_FRAMES), path_to_readdy_h5=path_to_readdy_h5, @@ -139,6 +142,7 @@ def _get_readdy_simularium_converter( spatial_units=UnitData("nm"), ) ) + return _filter_time(converter, n_timepoints) def visualize_individual_readdy_trajectory( @@ -187,10 +191,12 @@ def visualize_individual_readdy_trajectory( assert isinstance(h5_file_path, str) - converter = _get_readdy_simularium_converter(h5_file_path, total_steps) + converter = _get_readdy_simularium_converter(h5_file_path, total_steps, n_timepoints) if metrics: - _add_individual_plots(converter, metrics, metrics_data) + times = 2 * metrics_data["time"].values # "time" seems to range (0, 0.5) + times *= 1e-6 * (READDY_TIMESTEP * total_steps / n_timepoints) + _add_individual_plots(converter, metrics, metrics_data, times, converter._data.time_units) assert isinstance(h5_file_path, str) @@ -313,6 +319,8 @@ def visualize_individual_readdy_trajectories( # Upload saved file to S3. temp_key = f"{series_key}_{rep_ix}.h5.simularium" save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) + + return def _find_time_units(raw_time: float, units: str = "s") -> tuple[str, float]: @@ -435,7 +443,7 @@ def visualize_individual_cytosim_trajectory( ) if metrics: - _add_individual_plots(converter, metrics, metrics_data) + _add_individual_plots(converter, metrics, metrics_data, metrics_data["time"].values, converter._data.time_units) # Save simularium file. Turn off validate IDs for performance. local_file_path = f"{temp_path}/{series_key}_{index}" diff --git a/subcell_pipeline/visualization/spatial_annotator.py b/subcell_pipeline/visualization/spatial_annotator.py index fe035b1..9463ea3 100644 --- a/subcell_pipeline/visualization/spatial_annotator.py +++ b/subcell_pipeline/visualization/spatial_annotator.py @@ -128,6 +128,7 @@ def add_sphere_agents( sphere_positions: List[np.ndarray], type_name: str = "sphere", radius: float = 1.0, + rainbow_colors: bool = False, color: str = "#eaeaea", ) -> TrajectoryData: """ @@ -147,6 +148,9 @@ def add_sphere_agents( radius: float (optional) Radius to draw the spheres. Default: 1. + rainbow_colors : bool (optional) + If True, color the new spheres in rainbow order. + If False, use color instead. color: str (optional) Color for the new spheres. Default: "#eaeaea" @@ -171,22 +175,20 @@ def add_sphere_agents( VIZ_TYPE.DEFAULT ] new_agent_data.types[time_ix] += [ - f"{type_name} {ix}" for ix in range(n_spheres) + f"{type_name}#{ix}" for ix in range(n_spheres) ] new_agent_data.positions[time_ix][start_ix:end_ix] = sphere_positions[ time_ix ][:n_spheres] new_agent_data.radii[time_ix][start_ix:end_ix] = n_spheres * [radius] - # TODO use color parameter after finished debugging colors = ["#0000ff", "#00ff00", "#ffff00", "#ff0000", "#ff00ff"] - for ix in range(max_spheres): - tn = f"{type_name} {ix}" + tn = f"{type_name}#{ix}" new_agent_data.display_data[tn] = DisplayData( name=tn, display_type=DISPLAY_TYPE.SPHERE, - color=colors[ix % len(colors)], + color=colors[ix % len(colors)] if rainbow_colors else color, ) traj_data.agent_data = new_agent_data return traj_data diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py index c3b49d8..33dce18 100644 --- a/subcell_pipeline/visualization/tomography.py +++ b/subcell_pipeline/visualization/tomography.py @@ -117,12 +117,12 @@ def visualize_tomography( spatial_units, ) - # TODO remove after debugging fiber point order converter._data = SpatialAnnotator.add_sphere_agents( converter._data, fiber_points, type_name="point", radius=0.8, + rainbow_colors=True, ) if metrics: From 723343b7037b2cfaf2f19f95c700f3cda5976d25 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Wed, 17 Jul 2024 11:17:11 -0700 Subject: [PATCH 44/63] test and tweak individual cytosim viz --- .../_visualize_cytosim_trajectories.py | 1 - .../_visualize_readdy_trajectories.py | 1 - .../visualization/individual_trajectory.py | 39 +++++++++++++------ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py index 0cbc7b3..c6cf299 100644 --- a/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_cytosim_trajectories.py @@ -56,7 +56,6 @@ CompressionMetric.NON_COPLANARITY, CompressionMetric.PEAK_ASYMMETRY, CompressionMetric.AVERAGE_PERP_DISTANCE, - CompressionMetric.CALC_BENDING_ENERGY, CompressionMetric.CONTOUR_LENGTH, CompressionMetric.COMPRESSION_RATIO, ] diff --git a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py index dd1ccac..dc71685 100644 --- a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py @@ -59,7 +59,6 @@ CompressionMetric.NON_COPLANARITY, CompressionMetric.PEAK_ASYMMETRY, CompressionMetric.AVERAGE_PERP_DISTANCE, - CompressionMetric.CALC_BENDING_ENERGY, CompressionMetric.CONTOUR_LENGTH, CompressionMetric.COMPRESSION_RATIO, ] diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index b619d93..7b97b40 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -28,6 +28,7 @@ from subcell_pipeline.analysis.compression_metrics.compression_metric import ( CompressionMetric, ) +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import align_fiber from subcell_pipeline.simulation.cytosim.post_processing import CYTOSIM_SCALE_FACTOR from subcell_pipeline.simulation.readdy.loader import ReaddyLoader from subcell_pipeline.simulation.readdy.parser import BOX_SIZE as READDY_BOX_SIZE @@ -319,8 +320,6 @@ def visualize_individual_readdy_trajectories( # Upload saved file to S3. temp_key = f"{series_key}_{rep_ix}.h5.simularium" save_buffer(bucket, output_key, load_buffer(temp_path, temp_key)) - - return def _find_time_units(raw_time: float, units: str = "s") -> tuple[str, float]: @@ -341,6 +340,23 @@ def _filter_time( return converter +def _align_cytosim_fiber(converter: TrajectoryConverter) -> None: + """ + Align the fiber subpoints so that the furthest point from the x-axis + is aligned with the positive y-axis at the last time point. + """ + fiber_points = converter._data.agent_data.subpoints[:, 0, :] + n_timesteps = fiber_points.shape[0] + n_points = int(fiber_points.shape[1] / 3) + fiber_points = fiber_points.reshape((n_timesteps, n_points, 3)) + _, rotation = align_fiber(fiber_points[-1]) + for time_ix in range(n_timesteps): + rotated = np.dot(fiber_points[time_ix][:, 1:], rotation) + converter._data.agent_data.subpoints[time_ix, 0, :] = np.concatenate( + (fiber_points[time_ix][:, 0:1], rotated), axis=1 + ).reshape(n_points * 3) + + def _get_cytosim_simularium_converter( fiber_points_data: str, singles_data: str, @@ -350,21 +366,17 @@ def _get_cytosim_simularium_converter( Load from Cytosim outputs and generate a TrajectoryConverter to visualize an actin trajectory in Simularium. """ - - # TODO: fix converter not showing fiber, possible scaling issue - singles_display_data = DisplayData( name="linker", - radius=0.01, + radius=0.004, display_type=DISPLAY_TYPE.SPHERE, - color="#fff", + color="#eaeaea", ) - converter = CytosimConverter( CytosimData( meta_data=MetaData( box_size=BOX_SIZE, - scale_factor=CYTOSIM_SCALE_FACTOR, + scale_factor=1, ), object_info={ "fibers": CytosimObjectInfo( @@ -374,7 +386,7 @@ def _get_cytosim_simularium_converter( display_data={ 1: DisplayData( name="actin", - radius=0.02, + radius=0.002, display_type=DISPLAY_TYPE.FIBER, ) }, @@ -393,6 +405,10 @@ def _get_cytosim_simularium_converter( }, ) ) + _align_cytosim_fiber(converter) + converter._data.agent_data.radii *= CYTOSIM_SCALE_FACTOR + converter._data.agent_data.positions *= CYTOSIM_SCALE_FACTOR + converter._data.agent_data.subpoints *= CYTOSIM_SCALE_FACTOR converter = _filter_time(converter, n_timepoints) time_units, time_multiplier = _find_time_units(converter._data.agent_data.times[-1]) converter._data.agent_data.times *= time_multiplier @@ -443,7 +459,8 @@ def visualize_individual_cytosim_trajectory( ) if metrics: - _add_individual_plots(converter, metrics, metrics_data, metrics_data["time"].values, converter._data.time_units) + times = 1e3 * metrics_data["time"].values # s --> ms + _add_individual_plots(converter, metrics, metrics_data, times, converter._data.time_units) # Save simularium file. Turn off validate IDs for performance. local_file_path = f"{temp_path}/{series_key}_{index}" From 4dd48a3e32737141cf4ae45e3dc3a7c3a87a2b21 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Wed, 17 Jul 2024 12:34:24 -0700 Subject: [PATCH 45/63] updated camera views for individual and PCA viz --- .../visualization/_visualize_readdy_trajectories.py | 2 +- .../visualization/dimensionality_reduction.py | 6 +++--- .../visualization/individual_trajectory.py | 10 ++++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py index dc71685..d84c47a 100644 --- a/subcell_pipeline/visualization/_visualize_readdy_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_readdy_trajectories.py @@ -51,7 +51,7 @@ recalculate: bool = True # Temporary path to save downloaded trajectories -temp_path: Path = Path(__file__).parents[2] / "aws_downloads" +temp_path: Path = Path(__file__).parents[2] / "viz_outputs" temp_path.mkdir(parents=True, exist_ok=True) # List of compression metrics to include diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index 58035d2..9188457 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -113,8 +113,8 @@ def generate_simularium_and_save( meta_data = MetaData( box_size=BOX_SIZE, camera_defaults=CameraData( - position=np.array([0.0, 70.0, 350.0]), - look_at_position=np.array([0.0, 70.0, 0.0]), + position=np.array([-20.0, 350.0, 200.0]), + look_at_position=np.array([50.0, 0.0, 0.0]), fov_degrees=60.0, ), trajectory_title="Actin Compression Dimensionality Reduction", @@ -128,7 +128,7 @@ def generate_simularium_and_save( display_data, time_units, spatial_units, - fiber_radius=1.0, + fiber_radius=8.0, ) # Save locally and copy to bucket. diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index 7b97b40..c86027a 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -131,10 +131,9 @@ def _get_readdy_simularium_converter( meta_data=MetaData( box_size=READDY_BOX_SIZE, camera_defaults=CameraData( - position=np.array([0.0, 0.0, 300.0]), - look_at_position=np.zeros(3), - up_vector=np.array([0.0, 1.0, 0.0]), - fov_degrees=120.0, + position=np.array([70.0, 70.0, 300.0]), + look_at_position=np.array([70.0, 70.0, 0.0]), + fov_degrees=60.0, ), scale_factor=1.0, ), @@ -376,6 +375,9 @@ def _get_cytosim_simularium_converter( CytosimData( meta_data=MetaData( box_size=BOX_SIZE, + look_at_position=np.array([70.0, 70.0, 0.0]), + fov_degrees=60.0, + ), scale_factor=1, ), object_info={ From dfba623839d664747617405430a9dd9ee6ccfeab Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Wed, 17 Jul 2024 12:34:47 -0700 Subject: [PATCH 46/63] finish commit --- subcell_pipeline/visualization/individual_trajectory.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index c86027a..8788f64 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -375,6 +375,8 @@ def _get_cytosim_simularium_converter( CytosimData( meta_data=MetaData( box_size=BOX_SIZE, + camera_defaults=CameraData( + position=np.array([70.0, 70.0, 300.0]), look_at_position=np.array([70.0, 70.0, 0.0]), fov_degrees=60.0, ), From 05bccffdb2ae56bc180a67f965a7d02b021c5be6 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 17 Jul 2024 14:33:22 -0700 Subject: [PATCH 47/63] add ipdb to requirements --- pdm.lock | 64 +++++++++++++++++++++++++++++++----------------- pyproject.toml | 3 +++ requirements.txt | 3 +++ 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/pdm.lock b/pdm.lock index 891a1b0..6361c75 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "lint", "dev", "test", "docs"] +groups = ["default", "lint", "dev", "test", "docs", "debug"] strategy = ["cross_platform", "inherit_metadata"] -lock_version = "4.4.1" -content_hash = "sha256:a44304e4a75c0069b62e7aa6b0184948abfc853e8067ad4e274c1ce12737d12c" +lock_version = "4.4.2" +content_hash = "sha256:5bb53b76f4a8f343d2e5d21113e972b10b13d0e830cc7857ee1629af1333c7f4" [[package]] name = "aiobotocore" @@ -224,7 +224,7 @@ files = [ name = "asttokens" version = "2.4.1" summary = "Annotate AST trees with source code positions" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "six>=1.12.0", ] @@ -557,7 +557,7 @@ name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." -groups = ["default", "dev", "docs", "lint", "test"] +groups = ["debug", "default", "dev", "docs", "lint", "test"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -821,7 +821,7 @@ name = "decorator" version = "5.1.1" requires_python = ">=3.5" summary = "Decorators for Humans" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -911,7 +911,7 @@ name = "exceptiongroup" version = "1.2.1" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" -groups = ["default", "test"] +groups = ["debug", "default", "test"] marker = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, @@ -923,7 +923,7 @@ name = "executing" version = "2.0.1" requires_python = ">=3.5" summary = "Get the currently executing AST node of a frame, and other information" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, @@ -1314,6 +1314,24 @@ files = [ {file = "io_collection-0.10.2.tar.gz", hash = "sha256:40faa2fe94e8049dd900c42c09fbb4b1b5da2a226a2cd1618a1ffb89a636ea18"}, ] +[[package]] +name = "ipdb" +version = "0.13.13" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +summary = "IPython-enabled pdb" +groups = ["debug"] +dependencies = [ + "decorator; python_version > \"3.6\" and python_version < \"3.11\"", + "decorator; python_version >= \"3.11\"", + "ipython>=7.31.1; python_version > \"3.6\" and python_version < \"3.11\"", + "ipython>=7.31.1; python_version >= \"3.11\"", + "tomli; python_version > \"3.6\" and python_version < \"3.11\"", +] +files = [ + {file = "ipdb-0.13.13-py3-none-any.whl", hash = "sha256:45529994741c4ab6d2388bfa5d7b725c2cf7fe9deffabdb8a6113aa5ed449ed4"}, + {file = "ipdb-0.13.13.tar.gz", hash = "sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726"}, +] + [[package]] name = "ipykernel" version = "6.29.4" @@ -1345,7 +1363,7 @@ name = "ipython" version = "8.24.0" requires_python = ">=3.10" summary = "IPython: Productive Interactive Computing" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "colorama; sys_platform == \"win32\"", "decorator", @@ -1391,7 +1409,7 @@ name = "jedi" version = "0.19.1" requires_python = ">=3.6" summary = "An autocompletion tool for Python that can be used for text editors." -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "parso<0.9.0,>=0.8.3", ] @@ -1782,7 +1800,7 @@ name = "matplotlib-inline" version = "0.1.7" requires_python = ">=3.8" summary = "Inline Matplotlib backend for Jupyter" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "traitlets", ] @@ -2116,7 +2134,7 @@ name = "parso" version = "0.8.4" requires_python = ">=3.6" summary = "A Python Parser" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"}, @@ -2152,7 +2170,7 @@ files = [ name = "pexpect" version = "4.9.0" summary = "Pexpect allows easy control of interactive console applications." -groups = ["default"] +groups = ["debug", "default"] marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" dependencies = [ "ptyprocess>=0.5", @@ -2330,7 +2348,7 @@ name = "prompt-toolkit" version = "3.0.45" requires_python = ">=3.7.0" summary = "Library for building powerful interactive command lines in Python" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "wcwidth", ] @@ -2359,7 +2377,7 @@ files = [ name = "ptyprocess" version = "0.7.0" summary = "Run a subprocess in a pseudo terminal" -groups = ["default"] +groups = ["debug", "default"] marker = "sys_platform != \"win32\" and sys_platform != \"emscripten\"" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -2370,7 +2388,7 @@ files = [ name = "pure-eval" version = "0.2.2" summary = "Safely evaluate AST nodes without side effects" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, @@ -2504,7 +2522,7 @@ name = "pygments" version = "2.18.0" requires_python = ">=3.8" summary = "Pygments is a syntax highlighting package written in Python." -groups = ["default", "docs"] +groups = ["debug", "default", "docs"] files = [ {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, @@ -3187,7 +3205,7 @@ name = "six" version = "1.16.0" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" summary = "Python 2 and 3 compatibility utilities" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -3416,7 +3434,7 @@ files = [ name = "stack-data" version = "0.6.3" summary = "Extract data from python stack frames and tracebacks for informative displays" -groups = ["default"] +groups = ["debug", "default"] dependencies = [ "asttokens>=2.1.0", "executing>=1.2.0", @@ -3486,7 +3504,7 @@ name = "tomli" version = "2.0.1" requires_python = ">=3.7" summary = "A lil' TOML parser" -groups = ["dev", "docs", "lint", "test"] +groups = ["debug", "dev", "docs", "lint", "test"] marker = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, @@ -3532,7 +3550,7 @@ name = "traitlets" version = "5.14.3" requires_python = ">=3.8" summary = "Traitlets Python configuration system" -groups = ["default", "dev"] +groups = ["debug", "default", "dev"] files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"}, @@ -3560,7 +3578,7 @@ name = "typing-extensions" version = "4.12.0" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["default", "lint"] +groups = ["debug", "default", "lint"] files = [ {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, @@ -3686,7 +3704,7 @@ files = [ name = "wcwidth" version = "0.2.13" summary = "Measures the displayed width of unicode strings in a terminal" -groups = ["default"] +groups = ["debug", "default"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, diff --git a/pyproject.toml b/pyproject.toml index 53a5e60..bfd1fd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,9 @@ docs = [ "myst-parser>=3.0.1", "sphinx-copybutton>=0.5.2", ] +debug = [ + "ipdb>=0.13.13", +] [tool.black] line-length = 88 diff --git a/requirements.txt b/requirements.txt index 2dba41a..6a82226 100644 --- a/requirements.txt +++ b/requirements.txt @@ -475,6 +475,9 @@ iniconfig==2.0.0 \ io-collection==0.10.2 \ --hash=sha256:40faa2fe94e8049dd900c42c09fbb4b1b5da2a226a2cd1618a1ffb89a636ea18 \ --hash=sha256:66b5e5ae887fe532fbcfcfa75a6f09ee9afa27ad2480cf74ec0d1c2aabfacab9 +ipdb==0.13.13 \ + --hash=sha256:45529994741c4ab6d2388bfa5d7b725c2cf7fe9deffabdb8a6113aa5ed449ed4 \ + --hash=sha256:e3ac6018ef05126d442af680aad863006ec19d02290561ac88b8b1c0b0cfc726 ipykernel==6.29.4 \ --hash=sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da \ --hash=sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c From 83fab65cb0386d826f4dfa26fced7cb682906ea2 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 17 Jul 2024 15:38:02 -0700 Subject: [PATCH 48/63] Methods to calculate twist angle --- .../_compare_compression_metrics.py | 13 +- .../compression_analysis.py | 65 +++++- .../compression_metrics/compression_metric.py | 10 +- .../compression_metrics/polymer_trace.py | 187 +++++++++++++++++- 4 files changed, 262 insertions(+), 13 deletions(-) diff --git a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py index 989029e..2b7beab 100644 --- a/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py +++ b/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py @@ -26,6 +26,7 @@ from subcell_pipeline.analysis.compression_metrics.compression_analysis import ( get_compression_metric_data, + plot_metric_distribution, plot_metrics_vs_time, save_compression_metrics, ) @@ -66,7 +67,7 @@ # Specify whether the metrics should be recalculated. Set this to true if you # make changes to any metric calculation functions. -recalculate: bool = False +recalculate: bool = True # %% [markdown] """ @@ -83,6 +84,8 @@ CompressionMetric.CALC_BENDING_ENERGY, CompressionMetric.CONTOUR_LENGTH, CompressionMetric.COMPRESSION_RATIO, + CompressionMetric.TOTAL_FIBER_TWIST, + CompressionMetric.TWIST_ANGLE, ] # %% [markdown] @@ -171,3 +174,11 @@ figure_path=save_location, suffix="_subsampled", ) + +# %% +plot_metric_distribution( + df=combined_metrics, + metrics=metrics, + figure_path=save_location, + suffix="_subsampled", +) diff --git a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py index 11fad98..b820999 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_analysis.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_analysis.py @@ -131,10 +131,10 @@ def calculate_compression_metrics( polymer_trace=polymer_trace, **options ) - metrics = df_metrics.reset_index().rename(columns={"index": "time"}) - metrics["normalized_time"] = metrics["time"] / metrics["time"].max() + df_metrics = df_metrics.reset_index().rename(columns={"index": "time"}) + df_metrics["normalized_time"] = df_metrics["time"] / df_metrics["time"].max() - return metrics + return df_metrics def save_compression_metrics( @@ -230,3 +230,62 @@ def plot_metrics_vs_time( fig.tight_layout() if figure_path is not None: fig.savefig(figure_path / f"{metric.value}_vs_time{suffix}.png") + + +def plot_metric_distribution( + df: pd.DataFrame, + metrics: List[CompressionMetric], + figure_path: Union[Path, None] = None, + suffix: str = "", +) -> None: + """ + Plot metrics vs time. + + Parameters + ---------- + df + The input DataFrame. + + metrics + The list of metrics to plot. + + figure_path + The path to save the figure. + + suffix + The suffix to append to the figure filename. + Defaults to "". + + """ + num_velocities = df["velocity"].nunique() + plt.rcParams.update({"font.size": 16}) + + for metric in metrics: + fig, axs = plt.subplots( + 1, + num_velocities, + figsize=(num_velocities * 5, 5), + sharey=True, + sharex=True, + dpi=300, + ) + axs = axs.ravel() + for ct, (velocity, df_velocity) in enumerate(df.groupby("velocity")): + metric_values = df_velocity[metric.value] + bins = np.linspace(np.nanmin(metric_values), np.nanmax(metric_values), 20) + for simulator, df_simulator in df_velocity.groupby("simulator"): + axs[ct].hist( + df_simulator[metric.value], + label=f"{simulator}", + color=SIMULATOR_COLOR_MAP[simulator], # type: ignore + alpha=0.7, + bins=bins, + ) + axs[ct].set_title(f"Velocity: {velocity}") + if ct == 0: + axs[ct].legend() + fig.supxlabel(metric.label()) + fig.supylabel("Count") + fig.tight_layout() + if figure_path is not None: + fig.savefig(figure_path / f"{metric.value}_histogram{suffix}.png") diff --git a/subcell_pipeline/analysis/compression_metrics/compression_metric.py b/subcell_pipeline/analysis/compression_metrics/compression_metric.py index f2cf0f2..d92eac5 100644 --- a/subcell_pipeline/analysis/compression_metrics/compression_metric.py +++ b/subcell_pipeline/analysis/compression_metrics/compression_metric.py @@ -17,6 +17,7 @@ get_sum_bending_energy, get_third_component_variance, get_total_fiber_twist, + get_twist_angle, ) @@ -32,6 +33,7 @@ class CompressionMetric(Enum): CALC_BENDING_ENERGY = "calc_bending_energy" CONTOUR_LENGTH = "contour_length" COMPRESSION_RATIO = "compression_ratio" + TWIST_ANGLE = "twist_angle" def label(self: Enum) -> str: """ @@ -54,10 +56,11 @@ def label(self: Enum) -> str: CompressionMetric.AVERAGE_PERP_DISTANCE.value: ( "Average Perpendicular Distance" ), - CompressionMetric.TOTAL_FIBER_TWIST.value: "Total Fiber Twist", + CompressionMetric.TOTAL_FIBER_TWIST.value: "Fiber Twist", CompressionMetric.CALC_BENDING_ENERGY.value: "Calculated Bending Energy", CompressionMetric.CONTOUR_LENGTH.value: "Contour Length", CompressionMetric.COMPRESSION_RATIO.value: "Compression Ratio", + CompressionMetric.TWIST_ANGLE.value: "Twist Angle", } return labels.get(self.value, "") @@ -84,6 +87,9 @@ def description(self: Enum) -> str: CompressionMetric.CALC_BENDING_ENERGY.value: "energy", CompressionMetric.CONTOUR_LENGTH.value: "filament contour length (nm)", CompressionMetric.COMPRESSION_RATIO.value: "compression ratio", + CompressionMetric.TWIST_ANGLE.value: ( + "difference between initial and final tangent (degrees)" + ), } return units.get(self.value, "") @@ -110,6 +116,7 @@ def bounds(self: Enum) -> tuple[float, float]: CompressionMetric.CALC_BENDING_ENERGY.value: (0, 10), CompressionMetric.CONTOUR_LENGTH.value: (480, 505), CompressionMetric.COMPRESSION_RATIO.value: (0, 0), # TODO + CompressionMetric.TWIST_ANGLE.value: (-180, 180), } return bounds.get(self.value, (0, 0)) @@ -146,5 +153,6 @@ def calculate_metric( CompressionMetric.CALC_BENDING_ENERGY: get_bending_energy_from_trace, CompressionMetric.CONTOUR_LENGTH: get_contour_length_from_trace, CompressionMetric.COMPRESSION_RATIO: get_compression_ratio, + CompressionMetric.TWIST_ANGLE: get_twist_angle, } return functions[self](polymer_trace, **options) diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index 6a376d0..7364564 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -61,7 +61,7 @@ def get_end_to_end_axis_distances_and_projections( def get_average_distance_from_end_to_end_axis( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> float: +) -> np.float_: """ Calculate the average perpendicular distance of polymer trace points from the end-to-end axis. @@ -169,10 +169,7 @@ def get_contour_length_from_trace( : sum of inter-monomer distances in the trace """ - total_distance = np.float_(0) - for i in range(len(polymer_trace) - 1): - total_distance += np.linalg.norm(polymer_trace[i] - polymer_trace[i + 1]) - return total_distance.item() + return np.sum(np.linalg.norm(np.diff(polymer_trace, axis=0), axis=1)) def get_bending_energy_from_trace( @@ -226,9 +223,182 @@ def get_bending_energy_from_trace( return energy.item() +def get_2d_polymer_trace( + polymer_trace: np.ndarray, + compression_axis: int = 0, +) -> np.ndarray: + """ + Get the 2D projection of the polymer trace. + + Parameters + ---------- + polymer_trace + The polymer trace as an Nx3 numpy array. + + compression_axis + The axis along which the polymer trace is compressed. + + Returns + ------- + return1: type + return variable description + """ + if polymer_trace.shape[1] == 2: + return polymer_trace + + return polymer_trace[ + :, [ax for ax in range(polymer_trace.shape[1]) if ax != compression_axis] + ] + + +def get_normalized_tangent_vectors( + polymer_trace: np.ndarray, +) -> np.ndarray: + """ + Calculate the normalized tangent vectors for a polymer trace. + + Parameters + ---------- + polymer_trace + The polymer trace as an Nx3 numpy array. + + Returns + ------- + return1: type + return variable description + """ + tangents = np.diff(polymer_trace, axis=0) + + tangents /= np.linalg.norm(tangents, axis=1)[:, np.newaxis] + + return tangents + + +def get_twist_angle( + polymer_trace: np.ndarray, + **options: Dict[str, Any], +) -> float: + """ + Calculate the twist angle of the polymer trace. + + Parameters + ---------- + polymer_trace + array containing the x,y,z positions of the polymer trace + + **options: Dict[str, Any] + Additional options as key-value pairs. + + Returns + ------- + : + twist angle of the polymer trace + """ + compression_axis = options.get("compression_axis", 0) + assert isinstance(compression_axis, int) + + trace_2d = get_2d_polymer_trace( + polymer_trace=polymer_trace, compression_axis=compression_axis + ) + + tangents = get_normalized_tangent_vectors( + polymer_trace=trace_2d, + ) + + angle = get_angle_between_vectors(tangents[0], -tangents[-1], signed=False) + chirality = get_chirality(polymer_trace=polymer_trace) + + return chirality * angle * 180 / np.pi + + +def get_chirality( + polymer_trace: np.ndarray, + **options: Dict[str, Any], +) -> float: + """ + Calculate the chirality of a polymer trace. + + Parameters + ---------- + polymer_trace + array containing the x,y,z positions of the polymer trace + + **options: Dict[str, Any] + Additional options as key-value pairs. + + Returns + ------- + : + chirality of the polymer trace + """ + trace_2d = get_2d_polymer_trace(polymer_trace=polymer_trace) + tangents = get_normalized_tangent_vectors(polymer_trace=trace_2d) + + chirality = 0 + for i in range(len(tangents) - 1): + cross_product = np.cross(tangents[i], tangents[i + 1]) + if cross_product > 0: + chirality += 1 + elif cross_product < 0: + chirality -= 1 + + return np.sign(chirality) + + def get_total_fiber_twist( polymer_trace: np.ndarray, **options: Dict[str, Any], +) -> float: + """ + Calculate the total twist of a polymer trace using the normal + vectors. + + Parameters + ---------- + polymer_trace + array containing the x,y,z positions of the polymer trace + + **options: Dict[str, Any] + Additional options as key-value pairs: + + signed: bool + whether to return the signed or unsigned total twist + + Returns + ------- + : + total twist of the polymer trace + """ + signed = options.get("signed", False) + assert isinstance(signed, bool) + + tangents = np.diff(polymer_trace, axis=0) + tangents /= np.linalg.norm(tangents, axis=1)[:, np.newaxis] + + normals = np.cross(tangents[:-1], tangents[1:]) + normal_lengths = np.linalg.norm(normals, axis=1) + if np.all(normal_lengths < ABSOLUTE_TOLERANCE): + return 0 + normals = normals / normal_lengths[:, np.newaxis] + + twists = [] + for i in range(1, len(normals)): + angle = get_angle_between_vectors(normals[i - 1], normals[i], signed=signed) + + twists.append(angle) + + # Sum the twist angles to get the total twist + total_twist = np.sum(twists) + + # Normalize by the contour length + total_twist /= get_contour_length_from_trace(polymer_trace) + + return total_twist + + +def get_total_fiber_twist_project( + polymer_trace: np.ndarray, + **options: Dict[str, Any], ) -> float: """ Calculate the total twist using projections of the polymer trace @@ -260,10 +430,11 @@ def get_total_fiber_twist( assert isinstance(signed, bool) assert isinstance(tolerance, (float, np.floating)) + assert isinstance(compression_axis, int) - trace_2d = polymer_trace[ - :, [ax for ax in range(polymer_trace.shape[1]) if ax != compression_axis] - ] + trace_2d = get_2d_polymer_trace( + polymer_trace=polymer_trace, compression_axis=compression_axis + ) trace_2d = trace_2d - np.mean(trace_2d, axis=0) return get_total_fiber_twist_2d(trace_2d, signed=signed, tolerance=tolerance) From d8895fe80480b18af3a34f6162edb92d571dfad5 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 17 Jul 2024 15:39:05 -0700 Subject: [PATCH 49/63] Methods to visualize twist angle --- subcell_pipeline/visualization/__init__.py | 1 + .../_visualize_filament_angles.py | 78 ++++++++++++ .../visualization/fiber_angles.py | 118 ++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 subcell_pipeline/visualization/__init__.py create mode 100644 subcell_pipeline/visualization/_visualize_filament_angles.py create mode 100644 subcell_pipeline/visualization/fiber_angles.py diff --git a/subcell_pipeline/visualization/__init__.py b/subcell_pipeline/visualization/__init__.py new file mode 100644 index 0000000..ca68d74 --- /dev/null +++ b/subcell_pipeline/visualization/__init__.py @@ -0,0 +1 @@ +"""Visualization methods and notebooks.""" diff --git a/subcell_pipeline/visualization/_visualize_filament_angles.py b/subcell_pipeline/visualization/_visualize_filament_angles.py new file mode 100644 index 0000000..5ec0ede --- /dev/null +++ b/subcell_pipeline/visualization/_visualize_filament_angles.py @@ -0,0 +1,78 @@ +# %% [markdown] +# # Visualize tangent angles for fibers + +# %% [markdown] +""" +Notebook contains steps to visualize the twisting angles for fibers. + +- [Define visualization settings](#define-visualization-settings) +- [Visualize combined trajectories](#visualize-combined-trajectories) +""" + +# %% +if __name__ != "__main__": + raise ImportError("This module is a notebook and is not meant to be imported") + +# %% +from pathlib import Path + +import pandas as pd + +from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import ( + get_merged_data, +) +from subcell_pipeline.visualization.fiber_angles import visualize_tangent_angles + +# %% [markdown] +""" +## Define visualization settings +""" + +# %% +# Name of the simulation series +series_name: str = "COMPRESSION_VELOCITY" + +# S3 bucket Cytosim for input and output files +cytosim_bucket: str = "s3://cytosim-working-bucket" + +# S3 bucket ReaDDy for input and output files +readdy_bucket: str = "s3://readdy-working-bucket" + +# Random seeds for simulations +random_seeds: list[int] = [1, 2, 3, 4, 5] + +# List of condition file keys for each velocity +condition_keys: list[str] = ["0047", "0150", "0470", "1500"] + +# Location to save plot of metrics vs time (local path) +save_location: Path = Path(__file__).parents[3] / "analysis_outputs" +save_location.mkdir(parents=True, exist_ok=True) + +# Specify whether the metrics should be recalculated. Set this to true if you +# make changes to any metric calculation functions. +recalculate: bool = True + +# %% +readdy_data = get_merged_data( + readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds +) +readdy_data["simulator"] = "readdy" + +# %% +cytosim_data = get_merged_data( + cytosim_bucket, series_name, condition_keys, random_seeds +) +cytosim_data["simulator"] = "cytosim" + +# %% +data = pd.concat([cytosim_data, readdy_data]) +data["repeat"] = data["seed"] - 1 +data["velocity"] = data["key"].astype("int") / 10 + +# %% [markdown] +""" +## Visualize tangent angles +""" +visualize_tangent_angles(data) + +# %% diff --git a/subcell_pipeline/visualization/fiber_angles.py b/subcell_pipeline/visualization/fiber_angles.py new file mode 100644 index 0000000..601f137 --- /dev/null +++ b/subcell_pipeline/visualization/fiber_angles.py @@ -0,0 +1,118 @@ +from typing import Optional, Tuple, Union + +import matplotlib.axes +import matplotlib.figure +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from subcell_pipeline.analysis.compression_metrics.constants import SIMULATOR_COLOR_MAP +from subcell_pipeline.analysis.compression_metrics.polymer_trace import ( + get_2d_polymer_trace, +) + +plt.rcParams.update({"font.size": 14}) + + +def plot_initial_and_final_tangents( + polymer_trace: np.ndarray, + compression_axis: int, + ax: Optional[matplotlib.axes.Axes] = None, + color: str = "r", + scale: int = 1, +) -> Tuple[Union[matplotlib.figure.Figure, None], Union[matplotlib.axes.Axes, None]]: + """ + Plot the normalized tangent vectors along the fiber trace. + + Parameters + ---------- + polymer_trace + N x 3 array of fiber coordinates. + + compression_axis + The axis along which to compress the fibers. + + ax + The matplotlib axes object to plot on. + + color + The color of the tangent vectors + + scale + The scaling factor for the tangent + + Returns + ------- + : + None + """ + if ax is None: + fig, ax = plt.subplots(dpi=300) + else: + fig = ax.get_figure() + + arrowprops = {"arrowstyle": "->", "color": color, "lw": 1} + + trace_2d = get_2d_polymer_trace(polymer_trace, compression_axis) + trace_2d_norm = trace_2d / np.linalg.norm(trace_2d, axis=1)[:, np.newaxis] + + ax.annotate( + "", + xy=trace_2d_norm[1] * scale, + xytext=trace_2d_norm[0], + arrowprops=arrowprops, + ) + + ax.annotate( + "", + xy=trace_2d_norm[-1] * scale, + xytext=trace_2d_norm[-2], + arrowprops=arrowprops, + ) + + ax.plot(trace_2d[:, 0], trace_2d[:, 1], color=color) + + ax.set_ylabel("Z") + ax.set_xlabel("Y") + plt.tight_layout() + + return fig, ax + + +def visualize_tangent_angles( + merged_df: pd.DataFrame, + compression_axis: int = 0, +) -> None: + """ + Visualize tangent angles for each fiber in the merged dataframe + at the last timepoint. + + Parameters + ---------- + merged_df + The merged dataframe containing the fiber data. + + compression_axis + The axis along which to compress the fibers. + + Returns + ------- + : + None + """ + _, ax = plt.subplots(dpi=300) + for simulator, df_simulator in merged_df.groupby("simulator"): + color = SIMULATOR_COLOR_MAP[str(simulator)] + for _, df_condition in df_simulator.groupby("key"): + for _, df_seed in df_condition.groupby("seed"): + df_fiber = df_seed[df_seed["time"] == df_seed["time"].max()] + polymer_trace = df_fiber[["xpos", "ypos", "zpos"]].values + _, ax = plot_initial_and_final_tangents( + polymer_trace=polymer_trace, + compression_axis=compression_axis, + ax=ax, + color=color, + ) + + ax.set_aspect("equal") + plt.show() From 1cb6bff19133bde33b2840e1440a244b171d1f91 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Thu, 18 Jul 2024 12:02:31 -0700 Subject: [PATCH 50/63] Remove tangent angle visualization. Fix doc strings. --- .../compression_metrics/polymer_trace.py | 119 +++++++++--------- .../_visualize_filament_angles.py | 78 ------------ .../visualization/fiber_angles.py | 118 ----------------- 3 files changed, 62 insertions(+), 253 deletions(-) delete mode 100644 subcell_pipeline/visualization/_visualize_filament_angles.py delete mode 100644 subcell_pipeline/visualization/fiber_angles.py diff --git a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py index 7364564..4fd026e 100644 --- a/subcell_pipeline/analysis/compression_metrics/polymer_trace.py +++ b/subcell_pipeline/analysis/compression_metrics/polymer_trace.py @@ -25,24 +25,24 @@ def get_end_to_end_axis_distances_and_projections( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace points - at a given time + Array containing the x,y,z positions of the polymer trace points + at a given time. Returns ------- perp_distances - perpendicular distances of the polymer trace from the end-to-end axis + Perpendicular distances of the polymer trace from the end-to-end axis. scaled_projections - length of fiber point projections along the end-to-end axis, scaled + Length of fiber point projections along the end-to-end axis, scaled by axis length. Can be negative. projection_positions - positions of points on the end-to-end axis that are - closest from the respective points in the polymer trace. - The distance from projection_positions - to the trace points is the shortest distance from the end-to-end axis + Positions of points on the end-to-end axis that are closest from the + respective points in the polymer trace. + The distance from projection_positions to the trace points is the + shortest distance from the end-to-end axis. """ end_to_end_axis = get_end_to_end_unit_vector(polymer_trace=polymer_trace) end_to_end_axis_length = np.linalg.norm(polymer_trace[-1] - polymer_trace[0]) @@ -61,7 +61,7 @@ def get_end_to_end_axis_distances_and_projections( def get_average_distance_from_end_to_end_axis( polymer_trace: np.ndarray, **options: Dict[str, Any], -) -> np.float_: +) -> float: """ Calculate the average perpendicular distance of polymer trace points from the end-to-end axis. @@ -69,8 +69,8 @@ def get_average_distance_from_end_to_end_axis( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace - at a given time + Array containing the x,y,z positions of the polymer trace + at a given time. **options Additional options as key-value pairs. @@ -78,15 +78,15 @@ def get_average_distance_from_end_to_end_axis( Returns ------- : - average perpendicular distance of polymer trace points from the - end-to-end axis + Average perpendicular distance of polymer trace points from the + end-to-end axis. """ perp_distances, _, _ = get_end_to_end_axis_distances_and_projections( polymer_trace=polymer_trace ) avg_perp_distance = np.nanmean(perp_distances) - return avg_perp_distance + return float(avg_perp_distance) def get_asymmetry_of_peak( @@ -100,8 +100,8 @@ def get_asymmetry_of_peak( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace - at a given time + Array containing the x,y,z positions of the polymer trace + at a given time. **options Additional options as key-value pairs. @@ -109,7 +109,7 @@ def get_asymmetry_of_peak( Returns ------- : - scaled distance of the projection of the peak from the axis midpoint + Scaled distance of the projection of the peak from the axis midpoint. """ ( perp_distances, @@ -138,12 +138,12 @@ def get_pca_polymer_trace_projection( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. Returns ------- pca_projection - PCA projection of the polymer trace + PCA projection of the polymer trace. """ pca = fit_pca_to_polymer_trace(polymer_trace=polymer_trace) return pca.transform(polymer_trace) @@ -159,7 +159,7 @@ def get_contour_length_from_trace( Parameters ---------- polymer_trace - n x 3 array containing the x,y,z positions of the polymer trace + n x 3 array containing the x,y,z positions of the polymer trace. **options Additional options as key-value pairs. @@ -167,7 +167,7 @@ def get_contour_length_from_trace( Returns ------- : - sum of inter-monomer distances in the trace + Sum of inter-monomer distances in the trace. """ return np.sum(np.linalg.norm(np.diff(polymer_trace, axis=0), axis=1)) @@ -182,18 +182,18 @@ def get_bending_energy_from_trace( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options Additional options as key-value pairs. bending_constant: float - bending constant of the fiber in pN nm + Bending constant of the fiber in pN nm. Returns ------- : - bending energy per monomer of the polymer trace + Bending energy per monomer of the polymer trace. """ bending_constant = options.get("bending_constant", DEFAULT_BENDING_CONSTANT) @@ -240,8 +240,8 @@ def get_2d_polymer_trace( Returns ------- - return1: type - return variable description + : + The 2D projection of the polymer trace. """ if polymer_trace.shape[1] == 2: return polymer_trace @@ -264,12 +264,17 @@ def get_normalized_tangent_vectors( Returns ------- - return1: type - return variable description + : + The normalized tangent vectors for the polymer. """ tangents = np.diff(polymer_trace, axis=0) - tangents /= np.linalg.norm(tangents, axis=1)[:, np.newaxis] + tangent_lengths = np.linalg.norm(tangents, axis=1) + + if np.all(tangent_lengths < ABSOLUTE_TOLERANCE): + return np.zeros_like(tangents) + + tangents /= tangent_lengths[:, np.newaxis] return tangents @@ -284,7 +289,7 @@ def get_twist_angle( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs. @@ -292,7 +297,7 @@ def get_twist_angle( Returns ------- : - twist angle of the polymer trace + Twist angle of the polymer trace in degrees. """ compression_axis = options.get("compression_axis", 0) assert isinstance(compression_axis, int) @@ -321,7 +326,7 @@ def get_chirality( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs. @@ -329,7 +334,7 @@ def get_chirality( Returns ------- : - chirality of the polymer trace + Chirality of the polymer trace. """ trace_2d = get_2d_polymer_trace(polymer_trace=polymer_trace) tangents = get_normalized_tangent_vectors(polymer_trace=trace_2d) @@ -356,18 +361,18 @@ def get_total_fiber_twist( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs: signed: bool - whether to return the signed or unsigned total twist + Whether to return the signed or unsigned total twist. Returns ------- : - total twist of the polymer trace + Total twist of the polymer trace. """ signed = options.get("signed", False) assert isinstance(signed, bool) @@ -407,22 +412,22 @@ def get_total_fiber_twist_project( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs: compression_axis: int - axis along which the polymer trace is compressed + Axis along which the polymer trace is compressed. signed: bool - whether to return the signed or unsigned total twist + Whether to return the signed or unsigned total twist. tolerance: float ABSOLUTE_TOLERANCE Returns ------- : - sum of angles between PCA projection vectors + Sum of angles between PCA projection vectors. """ compression_axis = options.get("compression_axis", 0) signed = options.get("signed", True) @@ -451,7 +456,7 @@ def get_total_fiber_twist_pca( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. tolerance ABSOLUTE_TOLERANCE @@ -459,7 +464,7 @@ def get_total_fiber_twist_pca( Returns ------- : - sum of angles between PCA projection vectors + Sum of angles between PCA projection vectors. """ pca_trace = get_pca_polymer_trace_projection(polymer_trace=polymer_trace) pca_trace_2d = pca_trace[:, 1:] @@ -478,19 +483,19 @@ def get_angle_between_vectors( Parameters ---------- vec1 - The first vector + The first vector. vec2 - The second vector + The second vector. signed - if True, returns the signed angle between vec1 and vec2 - Default is False + If True, returns the signed angle between vec1 and vec2 + Default is False. Returns ------- : - signed angle between vec1 and vec2 + Signed angle between vec1 and vec2. """ vec1_length = np.linalg.norm(vec1) vec2_length = np.linalg.norm(vec2) @@ -523,19 +528,19 @@ def get_total_fiber_twist_2d( Parameters ---------- trace_2d - array containing the x,y positions of the polymer trace + Array containing the x,y positions of the polymer trace. signed - if True, returns the signed total twist - Default is False + If True, returns the signed total twist. + Default is False. tolerance - Tolerance for vector length + Tolerance for vector length. Returns ------- : - sum of angles between trace vectors + Sum of angles between trace vectors. """ prev_vec = None angles = np.zeros(len(trace_2d)) @@ -568,12 +573,12 @@ def fit_pca_to_polymer_trace( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. Returns ------- : - PCA object fitted to the polymer trace + PCA object fitted to the polymer trace. """ pca = PCA(n_components=3) pca.fit(polymer_trace) @@ -591,7 +596,7 @@ def get_third_component_variance( Parameters ---------- polymer_trace - array containing the x,y,z positions of the polymer trace + Array containing the x,y,z positions of the polymer trace. **options: Dict[str, Any] Additional options as key-value pairs. @@ -599,8 +604,8 @@ def get_third_component_variance( Returns ------- : - noncoplanarity of fiber - defined as the explained variance ratio of the third PCA component + Noncoplanarity of fiber. + Defined as the explained variance ratio of the third PCA component. """ pca = fit_pca_to_polymer_trace(polymer_trace=polymer_trace) return pca.explained_variance_ratio_[2] diff --git a/subcell_pipeline/visualization/_visualize_filament_angles.py b/subcell_pipeline/visualization/_visualize_filament_angles.py deleted file mode 100644 index 5ec0ede..0000000 --- a/subcell_pipeline/visualization/_visualize_filament_angles.py +++ /dev/null @@ -1,78 +0,0 @@ -# %% [markdown] -# # Visualize tangent angles for fibers - -# %% [markdown] -""" -Notebook contains steps to visualize the twisting angles for fibers. - -- [Define visualization settings](#define-visualization-settings) -- [Visualize combined trajectories](#visualize-combined-trajectories) -""" - -# %% -if __name__ != "__main__": - raise ImportError("This module is a notebook and is not meant to be imported") - -# %% -from pathlib import Path - -import pandas as pd - -from subcell_pipeline.analysis.dimensionality_reduction.fiber_data import ( - get_merged_data, -) -from subcell_pipeline.visualization.fiber_angles import visualize_tangent_angles - -# %% [markdown] -""" -## Define visualization settings -""" - -# %% -# Name of the simulation series -series_name: str = "COMPRESSION_VELOCITY" - -# S3 bucket Cytosim for input and output files -cytosim_bucket: str = "s3://cytosim-working-bucket" - -# S3 bucket ReaDDy for input and output files -readdy_bucket: str = "s3://readdy-working-bucket" - -# Random seeds for simulations -random_seeds: list[int] = [1, 2, 3, 4, 5] - -# List of condition file keys for each velocity -condition_keys: list[str] = ["0047", "0150", "0470", "1500"] - -# Location to save plot of metrics vs time (local path) -save_location: Path = Path(__file__).parents[3] / "analysis_outputs" -save_location.mkdir(parents=True, exist_ok=True) - -# Specify whether the metrics should be recalculated. Set this to true if you -# make changes to any metric calculation functions. -recalculate: bool = True - -# %% -readdy_data = get_merged_data( - readdy_bucket, f"ACTIN_{series_name}", condition_keys, random_seeds -) -readdy_data["simulator"] = "readdy" - -# %% -cytosim_data = get_merged_data( - cytosim_bucket, series_name, condition_keys, random_seeds -) -cytosim_data["simulator"] = "cytosim" - -# %% -data = pd.concat([cytosim_data, readdy_data]) -data["repeat"] = data["seed"] - 1 -data["velocity"] = data["key"].astype("int") / 10 - -# %% [markdown] -""" -## Visualize tangent angles -""" -visualize_tangent_angles(data) - -# %% diff --git a/subcell_pipeline/visualization/fiber_angles.py b/subcell_pipeline/visualization/fiber_angles.py deleted file mode 100644 index 601f137..0000000 --- a/subcell_pipeline/visualization/fiber_angles.py +++ /dev/null @@ -1,118 +0,0 @@ -from typing import Optional, Tuple, Union - -import matplotlib.axes -import matplotlib.figure -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - -from subcell_pipeline.analysis.compression_metrics.constants import SIMULATOR_COLOR_MAP -from subcell_pipeline.analysis.compression_metrics.polymer_trace import ( - get_2d_polymer_trace, -) - -plt.rcParams.update({"font.size": 14}) - - -def plot_initial_and_final_tangents( - polymer_trace: np.ndarray, - compression_axis: int, - ax: Optional[matplotlib.axes.Axes] = None, - color: str = "r", - scale: int = 1, -) -> Tuple[Union[matplotlib.figure.Figure, None], Union[matplotlib.axes.Axes, None]]: - """ - Plot the normalized tangent vectors along the fiber trace. - - Parameters - ---------- - polymer_trace - N x 3 array of fiber coordinates. - - compression_axis - The axis along which to compress the fibers. - - ax - The matplotlib axes object to plot on. - - color - The color of the tangent vectors - - scale - The scaling factor for the tangent - - Returns - ------- - : - None - """ - if ax is None: - fig, ax = plt.subplots(dpi=300) - else: - fig = ax.get_figure() - - arrowprops = {"arrowstyle": "->", "color": color, "lw": 1} - - trace_2d = get_2d_polymer_trace(polymer_trace, compression_axis) - trace_2d_norm = trace_2d / np.linalg.norm(trace_2d, axis=1)[:, np.newaxis] - - ax.annotate( - "", - xy=trace_2d_norm[1] * scale, - xytext=trace_2d_norm[0], - arrowprops=arrowprops, - ) - - ax.annotate( - "", - xy=trace_2d_norm[-1] * scale, - xytext=trace_2d_norm[-2], - arrowprops=arrowprops, - ) - - ax.plot(trace_2d[:, 0], trace_2d[:, 1], color=color) - - ax.set_ylabel("Z") - ax.set_xlabel("Y") - plt.tight_layout() - - return fig, ax - - -def visualize_tangent_angles( - merged_df: pd.DataFrame, - compression_axis: int = 0, -) -> None: - """ - Visualize tangent angles for each fiber in the merged dataframe - at the last timepoint. - - Parameters - ---------- - merged_df - The merged dataframe containing the fiber data. - - compression_axis - The axis along which to compress the fibers. - - Returns - ------- - : - None - """ - _, ax = plt.subplots(dpi=300) - for simulator, df_simulator in merged_df.groupby("simulator"): - color = SIMULATOR_COLOR_MAP[str(simulator)] - for _, df_condition in df_simulator.groupby("key"): - for _, df_seed in df_condition.groupby("seed"): - df_fiber = df_seed[df_seed["time"] == df_seed["time"].max()] - polymer_trace = df_fiber[["xpos", "ypos", "zpos"]].values - _, ax = plot_initial_and_final_tangents( - polymer_trace=polymer_trace, - compression_axis=compression_axis, - ax=ax, - color=color, - ) - - ax.set_aspect("equal") - plt.show() From a3e47405a4bc4bd8dc5f350499068179fce0dcab Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 18 Jul 2024 18:17:37 -0700 Subject: [PATCH 51/63] lint --- .../simulation/readdy/post_processor.py | 18 +++--- .../visualization/dimensionality_reduction.py | 55 ++++++++++++------- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index f535977..454c8a3 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -212,7 +212,9 @@ def _rotation( np.linalg.inv(self._orientation_from_positions(ideal_positions)), ) - def rotate_positions(self, positions: np.ndarray, rotation: np.ndarray) -> np.ndarray: + def rotate_positions( + self, positions: np.ndarray, rotation: np.ndarray + ) -> np.ndarray: """ Rotate an x,y,z position (or an array of them) around the x-axis with the given rotation matrix. @@ -223,13 +225,13 @@ def rotate_positions(self, positions: np.ndarray, rotation: np.ndarray) -> np.nd else: result = np.dot(positions[1:], rotation) return np.concatenate((positions[0:1], result), axis=0) - + def align_trajectory( - self, + self, fiber_points: list[list[np.ndarray]], ) -> tuple[np.ndarray, list[list[np.ndarray]]]: """ - Align the positions of particles in the trajectory + Align the positions of particles in the trajectory so that the furthest point from the x-axis is aligned with the positive y-axis at the last time point. @@ -251,7 +253,7 @@ def align_trajectory( Array (shape = timesteps x 1 x n x 3) containing the x,y,z positions of actin monomer particles at each timestep. fiber_points - List of lists of arrays (shape = n x 3) containing the x,y,z positions + List of lists of arrays (shape = n x 3) containing the x,y,z positions of control points for each fiber at each time. """ result = [] @@ -261,7 +263,9 @@ def align_trajectory( for _, particle in self.trajectory[time_ix].particles.items(): particle.position = self.rotate_positions(particle.position, rotation) result[time_ix].append(particle.position) - fiber_points[time_ix][0] = self.rotate_positions(fiber_points[time_ix][0], rotation) + fiber_points[time_ix][0] = self.rotate_positions( + fiber_points[time_ix][0], rotation + ) return np.array(result), fiber_points def linear_fiber_chain_ids( @@ -465,7 +469,7 @@ def linear_fiber_control_points( Returns ------- : - List of lists of arrays (shape = n x 3) containing the x,y,z positions + List of lists of arrays (shape = n x 3) containing the x,y,z positions of control points for each fiber at each time. """ if n_points < 2: diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index 9188457..4bab0a3 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -1,15 +1,15 @@ import os from typing import Tuple -import numpy as np import matplotlib.pyplot as plt -from matplotlib.colors import Colormap -from sklearn.decomposition import PCA +import numpy as np from io_collection.load.load_buffer import load_buffer from io_collection.load.load_dataframe import load_dataframe from io_collection.load.load_pickle import load_pickle from io_collection.save.save_buffer import save_buffer +from matplotlib.colors import Colormap from simulariumio import DISPLAY_TYPE, CameraData, DisplayData, MetaData, UnitData +from sklearn.decomposition import PCA from subcell_pipeline.visualization.fiber_points import ( generate_trajectory_converter_for_fiber_points, @@ -25,7 +25,7 @@ def rgb_to_hex_color(color): def pca_fiber_points_over_time( - samples: list[np.ndarray], + samples: list[np.ndarray], pca: PCA, pc_ix: int, simulator_name: str = "Combined", @@ -57,7 +57,7 @@ def pca_fiber_points_over_time( def pca_fiber_points_one_timestep( - samples: list[np.ndarray], + samples: list[np.ndarray], pca: PCA, color_maps: list[Colormap], pc_ix: int, @@ -72,7 +72,7 @@ def pca_fiber_points_one_timestep( simulator_name = "" if simulator_name: simulator_name += "_" - + fiber_points = [] type_names = [] display_data = {} @@ -93,7 +93,7 @@ def pca_fiber_points_one_timestep( display_type=DISPLAY_TYPE.FIBER, color=rgb_to_hex_color(color_map(abs(sample) / color_range)), ) - return fiber_points, type_names, display_data + return fiber_points, type_names, display_data def generate_simularium_and_save( @@ -130,7 +130,7 @@ def generate_simularium_and_save( spatial_units, fiber_radius=8.0, ) - + # Save locally and copy to bucket. output_key = name output_key += "_time" if distribution_over_time else "" @@ -141,6 +141,7 @@ def generate_simularium_and_save( output_key = f"{output_key}.simularium" save_buffer(bucket, f"{name}/{output_key}", load_buffer(temp_path, output_key)) + def visualize_dimensionality_reduction( bucket: str, pca_results_key: str, @@ -175,33 +176,45 @@ def visualize_dimensionality_reduction( separate_pcs True to Visualize PCs in separate files, False otherwise. sample_resolution - Number of samples for each PC distribution. + Number of samples for each PC distribution. temp_path Local path for saving visualization output files. """ pca_results = load_dataframe(bucket, pca_results_key) pca = load_pickle(bucket, pca_pickle_key) - + fiber_points = [[], []] if separate_pcs else [] type_names = [[], []] if separate_pcs else [] display_data = [{}, {}] if separate_pcs else {} pca_results_simulators = { - "Combined" : pca_results, + "Combined": pca_results, } if simulator_detail: - pca_results_simulators["ReaDDy"] = pca_results.loc[pca_results["SIMULATOR"] == "READDY"] - pca_results_simulators["Cytosim"] = pca_results.loc[pca_results["SIMULATOR"] == "CYTOSIM"] + pca_results_simulators["ReaDDy"] = pca_results.loc[ + pca_results["SIMULATOR"] == "READDY" + ] + pca_results_simulators["Cytosim"] = pca_results.loc[ + pca_results["SIMULATOR"] == "CYTOSIM" + ] color_maps = { - "Combined" : plt.colormaps.get_cmap("RdPu"), - "ReaDDy" : plt.colormaps.get_cmap("YlOrRd"), - "Cytosim" : plt.colormaps.get_cmap("GnBu"), + "Combined": plt.colormaps.get_cmap("RdPu"), + "ReaDDy": plt.colormaps.get_cmap("YlOrRd"), + "Cytosim": plt.colormaps.get_cmap("GnBu"), } dataset_name = os.path.splitext(pca_pickle_key)[0] pc_ixs = list(range(2)) for simulator in pca_results_simulators: samples = [ - np.arange(range_pc1[0], range_pc1[1], (range_pc1[1] - range_pc1[0]) / float(sample_resolution)), - np.arange(range_pc2[0], range_pc2[1], (range_pc2[1] - range_pc2[0]) / float(sample_resolution)), + np.arange( + range_pc1[0], + range_pc1[1], + (range_pc1[1] - range_pc1[0]) / float(sample_resolution), + ), + np.arange( + range_pc2[0], + range_pc2[1], + (range_pc2[1] - range_pc2[0]) / float(sample_resolution), + ), ] for pc_ix in pc_ixs: if distribution_over_time: @@ -209,8 +222,10 @@ def visualize_dimensionality_reduction( samples, pca, pc_ix, simulator ) else: - _fiber_points, _type_names, _display_data = pca_fiber_points_one_timestep( - samples, pca, color_maps, pc_ix, simulator + _fiber_points, _type_names, _display_data = ( + pca_fiber_points_one_timestep( + samples, pca, color_maps, pc_ix, simulator + ) ) if separate_pcs: fiber_points[pc_ix] += _fiber_points From e85f11734091465ed29afc41e82f187da29d1d24 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 18 Jul 2024 18:25:24 -0700 Subject: [PATCH 52/63] more lint --- subcell_pipeline/visualization/dimensionality_reduction.py | 6 ++---- subcell_pipeline/visualization/fiber_points.py | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index 4bab0a3..2b1f0b2 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -32,7 +32,7 @@ def pca_fiber_points_over_time( ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: """ Get fiber_points for samples of the PC distributions - in order to visualize the samples over time + in order to visualize the samples over time. """ if simulator_name == "Combined": simulator_name = "" @@ -107,9 +107,7 @@ def generate_simularium_and_save( temp_path: str, pc: str, ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: - """ - Generate a Simulariumio object for the fiber points and save it. - """ + """Generate a Simulariumio object for the fiber points and save it.""" meta_data = MetaData( box_size=BOX_SIZE, camera_defaults=CameraData( diff --git a/subcell_pipeline/visualization/fiber_points.py b/subcell_pipeline/visualization/fiber_points.py index be172f0..8b97b0b 100644 --- a/subcell_pipeline/visualization/fiber_points.py +++ b/subcell_pipeline/visualization/fiber_points.py @@ -35,6 +35,9 @@ def generate_trajectory_converter_for_fiber_points( Time unit data. spatial_units Spatial unit data. + fiber_radius + Radius to render fiber + Default: 0.5 Returns ------- From 65b4f9d81d1eae4382fed66664b4538b0c785f21 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 18 Jul 2024 18:28:37 -0700 Subject: [PATCH 53/63] and more linting --- .../visualization/individual_trajectory.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/subcell_pipeline/visualization/individual_trajectory.py b/subcell_pipeline/visualization/individual_trajectory.py index 8788f64..313d257 100644 --- a/subcell_pipeline/visualization/individual_trajectory.py +++ b/subcell_pipeline/visualization/individual_trajectory.py @@ -56,7 +56,9 @@ def _add_individual_plots( time_units: UnitData, ) -> None: """Add plots to individual trajectory with calculated metrics.""" - scatter_plots = make_empty_scatter_plots(metrics, times=times, time_units=time_units) + scatter_plots = make_empty_scatter_plots( + metrics, times=times, time_units=time_units + ) for metric, plot in scatter_plots.items(): plot.ytraces["filament"] = np.array(metrics_data[metric.value]) converter.add_plot(plot, "scatter") @@ -77,10 +79,12 @@ def _add_readdy_spatial_annotations( axis_positions=axis_positions, n_points=n_monomer_points, ) - converter._data.agent_data.positions, fiber_points = post_processor.align_trajectory(fiber_points) + converter._data.agent_data.positions, fiber_points = ( + post_processor.align_trajectory(fiber_points) + ) axis_positions, _ = post_processor.linear_fiber_axis_positions(fiber_chain_ids) edges = post_processor.edge_positions() - + # edges converter._data = SpatialAnnotator.add_fiber_agents( converter._data, @@ -118,7 +122,9 @@ def _add_readdy_spatial_annotations( def _get_readdy_simularium_converter( - path_to_readdy_h5: str, total_steps: int, n_timepoints: int, + path_to_readdy_h5: str, + total_steps: int, + n_timepoints: int, ) -> TrajectoryConverter: """ Load from ReaDDy outputs and generate a TrajectoryConverter to visualize an @@ -191,18 +197,19 @@ def visualize_individual_readdy_trajectory( assert isinstance(h5_file_path, str) - converter = _get_readdy_simularium_converter(h5_file_path, total_steps, n_timepoints) + converter = _get_readdy_simularium_converter( + h5_file_path, total_steps, n_timepoints + ) if metrics: times = 2 * metrics_data["time"].values # "time" seems to range (0, 0.5) times *= 1e-6 * (READDY_TIMESTEP * total_steps / n_timepoints) - _add_individual_plots(converter, metrics, metrics_data, times, converter._data.time_units) + _add_individual_plots( + converter, metrics, metrics_data, times, converter._data.time_units + ) assert isinstance(h5_file_path, str) - # TODO: fix temporal scaling? it looks like the actual data, metrics, and - # the annotations are drawing at different time scales - rep_id = rep_ix + 1 pickle_key = f"{series_name}/data/{series_key}_{rep_id:06d}.pkl" time_inc = total_steps // n_timepoints @@ -464,7 +471,9 @@ def visualize_individual_cytosim_trajectory( if metrics: times = 1e3 * metrics_data["time"].values # s --> ms - _add_individual_plots(converter, metrics, metrics_data, times, converter._data.time_units) + _add_individual_plots( + converter, metrics, metrics_data, times, converter._data.time_units + ) # Save simularium file. Turn off validate IDs for performance. local_file_path = f"{temp_path}/{series_key}_{index}" From 491173ebe2447919cd27577652665935545d8d2a Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Fri, 19 Jul 2024 13:04:09 -0700 Subject: [PATCH 54/63] fix saving tomography 2D plots --- .../_analyze_actin_cme_tomography_data.py | 12 ++++++++--- .../tomography_data/tomography_data.py | 20 +++++++++++++------ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index 608e561..2215b0d 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -26,6 +26,8 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% +from pathlib import Path + import pandas as pd from subcell_pipeline.analysis.tomography_data.tomography_data import ( @@ -53,6 +55,10 @@ # S3 bucket for input and output files bucket = "s3://subcell-working-bucket" +# Temporary path to save visualization files +temp_path: Path = Path(__file__).parents[3] / "analysis_outputs" +temp_path.mkdir(parents=True, exist_ok=True) + # Data repository for downloading tomography data repository = "https://raw.githubusercontent.com/RangamaniLabUCSD/actincme/master/PolarityAnalysis/" @@ -98,7 +104,7 @@ # %% plot_tomography_data_by_dataset( - branched_df, bucket, f"{name}/{name}_plots_branched.png" + branched_df, bucket, f"{name}/{name}_plots_branched.png", str(temp_path) ) # %% [markdown] @@ -108,7 +114,7 @@ # %% plot_tomography_data_by_dataset( - unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png" + unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png", str(temp_path) ) # %% [markdown] @@ -156,7 +162,7 @@ # %% plot_tomography_data_by_dataset( - sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png" + sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png", str(temp_path) ) # %% diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index 53c0cdd..b00f757 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -1,14 +1,17 @@ +import io import os +import imageio import matplotlib.pyplot as plt import numpy as np import pandas as pd from io_collection.keys.check_key import check_key from io_collection.load.load_dataframe import load_dataframe from io_collection.save.save_dataframe import save_dataframe +from io_collection.save.save_buffer import save_buffer_to_s3 +from PIL import Image -from ...constants import TOMOGRAPHY_SAMPLE_COLUMNS, WORKING_DIR_PATH -from ...temporary_file_io import make_working_directory, upload_file_to_s3 +TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_: @@ -281,8 +284,14 @@ def sample_tomography_data( return all_sampled_df +def save_image_to_s3(bucket: str, key: str, image: np.ndarray) -> None: + with io.BytesIO() as buffer: + Image.fromarray(image).save(buffer, format="png") + save_buffer_to_s3(bucket[5:], key, buffer, "image/png") + + def plot_tomography_data_by_dataset( - data: pd.DataFrame, bucket: str, output_key: str + data: pd.DataFrame, bucket: str, output_key: str, temp_path: str, ) -> None: """ Plot tomography data for each dataset. @@ -296,8 +305,7 @@ def plot_tomography_data_by_dataset( output_key File key for results. """ - make_working_directory() - local_save_path = os.path.join(WORKING_DIR_PATH, os.path.basename(output_key)) + local_save_path = os.path.join(temp_path, os.path.basename(output_key)) for dataset, group in data.groupby("dataset"): _, ax = plt.subplots(1, 3, figsize=(6, 2)) @@ -317,4 +325,4 @@ def plot_tomography_data_by_dataset( ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) plt.savefig(local_save_path) - upload_file_to_s3(bucket, local_save_path, output_key) + save_image_to_s3(bucket, output_key, imageio.imread(local_save_path)) From 7c87800281c3641a6bdaeace49cd99706fd7e8fb Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Fri, 19 Jul 2024 13:04:45 -0700 Subject: [PATCH 55/63] tweaks to dim reduction and tomography viz --- pyproject.toml | 1 + .../_visualize_combined_trajectories.py | 1 - .../_visualize_dimensionality_reduction.py | 21 +++++++--- .../visualization/dimensionality_reduction.py | 41 +++++++++++-------- subcell_pipeline/visualization/tomography.py | 8 ---- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 53a5e60..2026f23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "setuptools>=70.0.0", "io-collection>=0.10.2", "python-dotenv>=1.0.1", + "imageio", ] [project.urls] diff --git a/subcell_pipeline/visualization/_visualize_combined_trajectories.py b/subcell_pipeline/visualization/_visualize_combined_trajectories.py index a4c82c9..e801f43 100644 --- a/subcell_pipeline/visualization/_visualize_combined_trajectories.py +++ b/subcell_pipeline/visualization/_visualize_combined_trajectories.py @@ -67,7 +67,6 @@ CompressionMetric.NON_COPLANARITY, CompressionMetric.PEAK_ASYMMETRY, CompressionMetric.AVERAGE_PERP_DISTANCE, - CompressionMetric.CALC_BENDING_ENERGY, CompressionMetric.CONTOUR_LENGTH, CompressionMetric.COMPRESSION_RATIO, ] diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index 98aeadb..2bc3e55 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -51,11 +51,23 @@ # Select if simulator distributions are shown # - True to show ReaDDy and Cytosim separately # - False to show all together -simulator_detail = False +simulator_detail = True # Ranges to sample for each PC -range_pc1 = [-1200, 900] -range_pc2 = [-550, 250] +sample_ranges = { + "Combined": [ + [-1200, 900], # pc1 + [-550, 250], # pc2 + ], + "ReaDDy": [ + [-1078, 782], # pc1 + [-517, 154], # pc2 + ], + "Cytosim": [ + [-1064, 758], # pc1 + [-174, 173], # pc2 + ], +} # Select how PCs are saved # - True to save each PC in a separate file @@ -82,8 +94,7 @@ pca_pickle_key, distribution_over_time, simulator_detail, - range_pc1, - range_pc2, + sample_ranges, separate_pcs, sample_resolution, str(temp_path), diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index 2b1f0b2..d037c39 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -29,6 +29,7 @@ def pca_fiber_points_over_time( pca: PCA, pc_ix: int, simulator_name: str = "Combined", + color: str = "#eaeaea", ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: """ Get fiber_points for samples of the PC distributions @@ -51,7 +52,7 @@ def pca_fiber_points_over_time( display_data[type_name] = DisplayData( name=type_name, display_type=DISPLAY_TYPE.FIBER, - color="#eaeaea", + color=color, ) return [fiber_points], [type_name], display_data @@ -111,8 +112,10 @@ def generate_simularium_and_save( meta_data = MetaData( box_size=BOX_SIZE, camera_defaults=CameraData( - position=np.array([-20.0, 350.0, 200.0]), - look_at_position=np.array([50.0, 0.0, 0.0]), + # position=np.array([-20.0, 350.0, 200.0]), + # look_at_position=np.array([50.0, 0.0, 0.0]), + position=np.array([70.0, 70.0, 300.0]), + look_at_position=np.array([70.0, 70.0, 0.0]), fov_degrees=60.0, ), trajectory_title="Actin Compression Dimensionality Reduction", @@ -126,7 +129,8 @@ def generate_simularium_and_save( display_data, time_units, spatial_units, - fiber_radius=8.0, + fiber_radius=1.0, + # fiber_radius=6.0, ) # Save locally and copy to bucket. @@ -146,8 +150,7 @@ def visualize_dimensionality_reduction( pca_pickle_key: str, distribution_over_time: bool, simulator_detail: bool, - range_pc1: list[float], - range_pc2: list[float], + sample_ranges: dict[str, list[list[float]]], separate_pcs: bool, sample_resolution: int, temp_path: str, @@ -167,10 +170,9 @@ def visualize_dimensionality_reduction( True to scroll through the PC distributions over time, False otherwise. simulator_detail True to show individual simulator ranges, False otherwise. - range_pc1 - Min and max values of PC1 to visualize. - range_pc2 - Min and max values of PC2 to visualize. + sample_ranges + Min and max values to visualize for each PC + (and each simulator if simulator_detail). separate_pcs True to Visualize PCs in separate files, False otherwise. sample_resolution @@ -199,25 +201,30 @@ def visualize_dimensionality_reduction( "ReaDDy": plt.colormaps.get_cmap("YlOrRd"), "Cytosim": plt.colormaps.get_cmap("GnBu"), } + over_time_colors = { + "Combined": "#ffffff", + "ReaDDy": "#ff8f52", + "Cytosim": "#1cbfaa", + } dataset_name = os.path.splitext(pca_pickle_key)[0] pc_ixs = list(range(2)) for simulator in pca_results_simulators: samples = [ np.arange( - range_pc1[0], - range_pc1[1], - (range_pc1[1] - range_pc1[0]) / float(sample_resolution), + sample_ranges[simulator][0][0], + sample_ranges[simulator][0][1], + (sample_ranges[simulator][0][1] - sample_ranges[simulator][0][0]) / float(sample_resolution), ), np.arange( - range_pc2[0], - range_pc2[1], - (range_pc2[1] - range_pc2[0]) / float(sample_resolution), + sample_ranges[simulator][1][0], + sample_ranges[simulator][1][1], + (sample_ranges[simulator][1][1] - sample_ranges[simulator][1][0]) / float(sample_resolution), ), ] for pc_ix in pc_ixs: if distribution_over_time: _fiber_points, _type_names, _display_data = pca_fiber_points_over_time( - samples, pca, pc_ix, simulator + samples, pca, pc_ix, simulator, over_time_colors[simulator] ) else: _fiber_points, _type_names, _display_data = ( diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py index 33dce18..9dd6254 100644 --- a/subcell_pipeline/visualization/tomography.py +++ b/subcell_pipeline/visualization/tomography.py @@ -117,14 +117,6 @@ def visualize_tomography( spatial_units, ) - converter._data = SpatialAnnotator.add_sphere_agents( - converter._data, - fiber_points, - type_name="point", - radius=0.8, - rainbow_colors=True, - ) - if metrics: _add_tomography_plots(converter, metrics, all_fiber_points) From aa34e3b0eedfc40237cf0de2c60a52bafcf6097d Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Fri, 19 Jul 2024 13:37:39 -0700 Subject: [PATCH 56/63] lint --- .../tomography_data/tomography_data.py | 12 +++-- .../simulation/readdy/post_processor.py | 4 +- .../_visualize_dimensionality_reduction.py | 8 ++-- .../visualization/dimensionality_reduction.py | 44 ++++++++++--------- subcell_pipeline/visualization/tomography.py | 1 - 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index b00f757..c14ed06 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -7,8 +7,8 @@ import pandas as pd from io_collection.keys.check_key import check_key from io_collection.load.load_dataframe import load_dataframe -from io_collection.save.save_dataframe import save_dataframe from io_collection.save.save_buffer import save_buffer_to_s3 +from io_collection.save.save_dataframe import save_dataframe from PIL import Image TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] @@ -291,7 +291,10 @@ def save_image_to_s3(bucket: str, key: str, image: np.ndarray) -> None: def plot_tomography_data_by_dataset( - data: pd.DataFrame, bucket: str, output_key: str, temp_path: str, + data: pd.DataFrame, + bucket: str, + output_key: str, + temp_path: str, ) -> None: """ Plot tomography data for each dataset. @@ -304,6 +307,8 @@ def plot_tomography_data_by_dataset( Where to upload the results. output_key File key for results. + temp_path + Local path for saving visualization output files. """ local_save_path = os.path.join(temp_path, os.path.basename(output_key)) @@ -325,4 +330,5 @@ def plot_tomography_data_by_dataset( ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) plt.savefig(local_save_path) - save_image_to_s3(bucket, output_key, imageio.imread(local_save_path)) + image: np.ndarray = imageio.imread(local_save_path) + save_image_to_s3(bucket, output_key, image) diff --git a/subcell_pipeline/simulation/readdy/post_processor.py b/subcell_pipeline/simulation/readdy/post_processor.py index 454c8a3..8ef5f64 100644 --- a/subcell_pipeline/simulation/readdy/post_processor.py +++ b/subcell_pipeline/simulation/readdy/post_processor.py @@ -256,7 +256,7 @@ def align_trajectory( List of lists of arrays (shape = n x 3) containing the x,y,z positions of control points for each fiber at each time. """ - result = [] + result: list[list[np.ndarray]] = [] _, rotation = align_fiber(fiber_points[-1][0]) for time_ix in range(len(self.trajectory)): result.append([]) @@ -586,7 +586,7 @@ def edge_positions(self) -> list[list[np.ndarray]]: List of list of edges as position of each of the two connected particles for each edge at each time. """ - edges = [] + edges: list[list[np.ndarray]] = [] for frame in self.trajectory: edges.append([]) for edge in frame.edge_ids: diff --git a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py index 2bc3e55..26ed95f 100644 --- a/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py +++ b/subcell_pipeline/visualization/_visualize_dimensionality_reduction.py @@ -54,17 +54,17 @@ simulator_detail = True # Ranges to sample for each PC -sample_ranges = { +sample_ranges: dict[str, list[list[float]]] = { "Combined": [ - [-1200, 900], # pc1 + [-1200, 900], # pc1 [-550, 250], # pc2 ], "ReaDDy": [ - [-1078, 782], # pc1 + [-1078, 782], # pc1 [-517, 154], # pc2 ], "Cytosim": [ - [-1064, 758], # pc1 + [-1064, 758], # pc1 [-174, 173], # pc2 ], } diff --git a/subcell_pipeline/visualization/dimensionality_reduction.py b/subcell_pipeline/visualization/dimensionality_reduction.py index d037c39..797d180 100644 --- a/subcell_pipeline/visualization/dimensionality_reduction.py +++ b/subcell_pipeline/visualization/dimensionality_reduction.py @@ -19,9 +19,9 @@ """Bounding box size for dimensionality reduction trajectory.""" -def rgb_to_hex_color(color): +def rgb_to_hex_color(color: tuple[float, float, float]) -> str: rgb = (int(255 * color[0]), int(255 * color[1]), int(255 * color[2])) - return "#%02x%02x%02x" % rgb + return f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" def pca_fiber_points_over_time( @@ -39,28 +39,28 @@ def pca_fiber_points_over_time( simulator_name = "" if simulator_name: simulator_name += "#" - fiber_points = [] - display_data = {} + fiber_points: list[np.ndarray] = [] + display_data: dict[str, DisplayData] = {} for sample_ix in range(len(samples[0])): if pc_ix < 1: data = [samples[0][sample_ix], 0] else: data = [0, samples[1][sample_ix]] fiber_points.append(pca.inverse_transform(data).reshape(-1, 3)) - fiber_points = np.array(fiber_points) - type_name = f"{simulator_name}PC{pc_ix + 1}" + fiber_points_arr: np.ndarray = np.array(fiber_points) + type_name: str = f"{simulator_name}PC{pc_ix + 1}" display_data[type_name] = DisplayData( name=type_name, display_type=DISPLAY_TYPE.FIBER, color=color, ) - return [fiber_points], [type_name], display_data + return [fiber_points_arr], [type_name], display_data def pca_fiber_points_one_timestep( samples: list[np.ndarray], pca: PCA, - color_maps: list[Colormap], + color_maps: dict[str, Colormap], pc_ix: int, simulator_name: str = "Combined", ) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: @@ -107,7 +107,7 @@ def generate_simularium_and_save( bucket: str, temp_path: str, pc: str, -) -> Tuple[list[np.ndarray], list[str], dict[str, DisplayData]]: +) -> None: """Generate a Simulariumio object for the fiber points and save it.""" meta_data = MetaData( box_size=BOX_SIZE, @@ -171,7 +171,7 @@ def visualize_dimensionality_reduction( simulator_detail True to show individual simulator ranges, False otherwise. sample_ranges - Min and max values to visualize for each PC + Min and max values to visualize for each PC (and each simulator if simulator_detail). separate_pcs True to Visualize PCs in separate files, False otherwise. @@ -183,9 +183,9 @@ def visualize_dimensionality_reduction( pca_results = load_dataframe(bucket, pca_results_key) pca = load_pickle(bucket, pca_pickle_key) - fiber_points = [[], []] if separate_pcs else [] - type_names = [[], []] if separate_pcs else [] - display_data = [{}, {}] if separate_pcs else {} + fiber_points: list[list[np.ndarray]] = [[], []] + type_names: list[list[str]] = [[], []] + display_data: list[dict[str, DisplayData]] = [{}, {}] pca_results_simulators = { "Combined": pca_results, } @@ -213,12 +213,14 @@ def visualize_dimensionality_reduction( np.arange( sample_ranges[simulator][0][0], sample_ranges[simulator][0][1], - (sample_ranges[simulator][0][1] - sample_ranges[simulator][0][0]) / float(sample_resolution), + (sample_ranges[simulator][0][1] - sample_ranges[simulator][0][0]) + / float(sample_resolution), ), np.arange( sample_ranges[simulator][1][0], sample_ranges[simulator][1][1], - (sample_ranges[simulator][1][1] - sample_ranges[simulator][1][0]) / float(sample_resolution), + (sample_ranges[simulator][1][1] - sample_ranges[simulator][1][0]) + / float(sample_resolution), ), ] for pc_ix in pc_ixs: @@ -237,9 +239,9 @@ def visualize_dimensionality_reduction( type_names[pc_ix] += _type_names display_data[pc_ix] = {**display_data[pc_ix], **_display_data} else: - fiber_points += _fiber_points - type_names += _type_names - display_data = {**display_data, **_display_data} + fiber_points[0] += _fiber_points + type_names[0] += _type_names + display_data[0] = {**display_data[0], **_display_data} if separate_pcs: for pc_ix in pc_ixs: generate_simularium_and_save( @@ -256,9 +258,9 @@ def visualize_dimensionality_reduction( else: generate_simularium_and_save( dataset_name, - fiber_points, - type_names, - display_data, + fiber_points[0], + type_names[0], + display_data[0], distribution_over_time, simulator_detail, bucket, diff --git a/subcell_pipeline/visualization/tomography.py b/subcell_pipeline/visualization/tomography.py index 9dd6254..dd6d4a3 100644 --- a/subcell_pipeline/visualization/tomography.py +++ b/subcell_pipeline/visualization/tomography.py @@ -15,7 +15,6 @@ generate_trajectory_converter_for_fiber_points, ) from subcell_pipeline.visualization.histogram_plots import make_empty_histogram_plots -from subcell_pipeline.visualization.spatial_annotator import SpatialAnnotator TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] From 9d87bfe885689b87aa099606a78c6659cb019cd4 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 25 Jul 2024 10:23:09 -0700 Subject: [PATCH 57/63] Update subcell_pipeline/analysis/compression_metrics/README.md Co-authored-by: Jessica S. Yu <15913767+jessicasyu@users.noreply.github.com> --- subcell_pipeline/analysis/compression_metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subcell_pipeline/analysis/compression_metrics/README.md b/subcell_pipeline/analysis/compression_metrics/README.md index b595d76..1960225 100644 --- a/subcell_pipeline/analysis/compression_metrics/README.md +++ b/subcell_pipeline/analysis/compression_metrics/README.md @@ -2,6 +2,6 @@ ## Metrics for comparing traces of compressed fibers -Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics metrics to compare fibers. +Analysis combines compression simulations from Cytosim and Readdy and calculates various compression metrics to compare fibers. - **Compare compression metrics between simulators** ([source](https://github.com/simularium/subcell-pipeline/blob/main/subcell_pipeline/analysis/compression_metrics/_compare_compression_metrics.py) | [notebook](https://simularium.github.io/subcell-pipeline/_notebooks/analysis/compression_metrics/_compare_compression_metrics.html)) From 7085345a4ceb08db5dbcc758970eed761c2a14e9 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 25 Jul 2024 11:02:46 -0700 Subject: [PATCH 58/63] changes from review --- .../dimensionality_reduction/fiber_data.py | 2 +- .../_analyze_actin_cme_tomography_data.py | 22 +++++--------- .../tomography_data/tomography_data.py | 30 ++++--------------- subcell_pipeline/simulation/readdy/loader.py | 6 +++- .../_visualize_tomography_data.py | 2 +- 5 files changed, 21 insertions(+), 41 deletions(-) diff --git a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py index 0d11135..80be79b 100644 --- a/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py +++ b/subcell_pipeline/analysis/dimensionality_reduction/fiber_data.py @@ -106,7 +106,7 @@ def align_fibers(data: pd.DataFrame) -> None: def align_fiber(coords: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ - Align an array of x, y, z coordinates along the positive x axis. + Align an array of x, y, z coordinates along the positive y axis. The function identifies the furthest point in the yz-plane and computes the angle needed to rotate this point to lie on the positive y axis. This diff --git a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py index 2215b0d..b712d82 100644 --- a/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/_analyze_actin_cme_tomography_data.py @@ -26,8 +26,6 @@ raise ImportError("This module is a notebook and is not meant to be imported") # %% -from pathlib import Path - import pandas as pd from subcell_pipeline.analysis.tomography_data.tomography_data import ( @@ -37,9 +35,6 @@ sample_tomography_data, ) -# pixels to um -TOMOGRAPHY_SCALE_FACTOR: float = 0.0006 - # %% [markdown] """ ## Load tomography datasets @@ -55,13 +50,12 @@ # S3 bucket for input and output files bucket = "s3://subcell-working-bucket" -# Temporary path to save visualization files -temp_path: Path = Path(__file__).parents[3] / "analysis_outputs" -temp_path.mkdir(parents=True, exist_ok=True) - # Data repository for downloading tomography data repository = "https://raw.githubusercontent.com/RangamaniLabUCSD/actincme/master/PolarityAnalysis/" +# Conversion factor from pixels to um for this dataset +tomography_scale_factor: float = 0.0006 + # Folders and names of branched actin datasets branched_datasets = [ ("2018August_Tomo27", "TomoAugust_27_earlyCME"), @@ -87,14 +81,14 @@ name=name, repository=repository, datasets=branched_datasets, - scale_factor=TOMOGRAPHY_SCALE_FACTOR, + scale_factor=tomography_scale_factor, ) unbranched_df = get_unbranched_tomography_data( bucket=bucket, name=name, repository=repository, datasets=unbranched_datasets, - scale_factor=TOMOGRAPHY_SCALE_FACTOR, + scale_factor=tomography_scale_factor, ) # %% [markdown] @@ -104,7 +98,7 @@ # %% plot_tomography_data_by_dataset( - branched_df, bucket, f"{name}/{name}_plots_branched.png", str(temp_path) + branched_df, bucket, f"{name}/{name}_plots_branched.png" ) # %% [markdown] @@ -114,7 +108,7 @@ # %% plot_tomography_data_by_dataset( - unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png", str(temp_path) + unbranched_df, bucket, f"{name}/{name}_plots_unbranched.png" ) # %% [markdown] @@ -162,7 +156,7 @@ # %% plot_tomography_data_by_dataset( - sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png", str(temp_path) + sampled_data, bucket, f"{name}/{name}_plots_all_sampled.png" ) # %% diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index c14ed06..c400d55 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -1,20 +1,15 @@ -import io -import os - -import imageio import matplotlib.pyplot as plt import numpy as np import pandas as pd from io_collection.keys.check_key import check_key from io_collection.load.load_dataframe import load_dataframe -from io_collection.save.save_buffer import save_buffer_to_s3 from io_collection.save.save_dataframe import save_dataframe -from PIL import Image +from io_collection.save.save_figure import save_figure TOMOGRAPHY_SAMPLE_COLUMNS: list[str] = ["xpos", "ypos", "zpos"] -def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_: +def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> bool: """ Test whether the angles between consecutive segments of a polymer trace are less than 90 degrees. @@ -26,7 +21,7 @@ def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_: Returns ------- - bool + : True if all consecutive angles are less than 180 degrees. """ vectors = polymer_trace[1:] - polymer_trace[:-1] @@ -35,7 +30,7 @@ def test_consecutive_segment_angles(polymer_trace: np.ndarray) -> np.bool_: dot_products = np.dot(vectors[1:], vectors[:-1].T) # Check if any angle is greater than 90 degrees - return np.all(dot_products > 0) + return np.all(dot_products > 0).item() def read_tomography_data(file: str, label: str = "fil") -> pd.DataFrame: @@ -284,17 +279,10 @@ def sample_tomography_data( return all_sampled_df -def save_image_to_s3(bucket: str, key: str, image: np.ndarray) -> None: - with io.BytesIO() as buffer: - Image.fromarray(image).save(buffer, format="png") - save_buffer_to_s3(bucket[5:], key, buffer, "image/png") - - def plot_tomography_data_by_dataset( data: pd.DataFrame, bucket: str, output_key: str, - temp_path: str, ) -> None: """ Plot tomography data for each dataset. @@ -307,13 +295,9 @@ def plot_tomography_data_by_dataset( Where to upload the results. output_key File key for results. - temp_path - Local path for saving visualization output files. """ - local_save_path = os.path.join(temp_path, os.path.basename(output_key)) - + figure, ax = plt.subplots(1, 3, figsize=(6, 2)) for dataset, group in data.groupby("dataset"): - _, ax = plt.subplots(1, 3, figsize=(6, 2)) ax[1].set_title(dataset) @@ -329,6 +313,4 @@ def plot_tomography_data_by_dataset( ax[1].plot(fiber["xpos"], fiber["zpos"], marker="o", ms=1, lw=1) ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) - plt.savefig(local_save_path) - image: np.ndarray = imageio.imread(local_save_path) - save_image_to_s3(bucket, output_key, image) + save_figure(bucket, output_key, figure) diff --git a/subcell_pipeline/simulation/readdy/loader.py b/subcell_pipeline/simulation/readdy/loader.py index f83acd4..9d35e66 100644 --- a/subcell_pipeline/simulation/readdy/loader.py +++ b/subcell_pipeline/simulation/readdy/loader.py @@ -9,7 +9,11 @@ from io_collection.save.save_pickle import save_pickle from tqdm import tqdm -from .data_structures import FrameData, ParticleData, TopologyData +from subcell_pipeline.simulation.readdy.data_structures import ( + FrameData, + ParticleData, + TopologyData, +) class ReaddyLoader: diff --git a/subcell_pipeline/visualization/_visualize_tomography_data.py b/subcell_pipeline/visualization/_visualize_tomography_data.py index 4208991..88f91be 100644 --- a/subcell_pipeline/visualization/_visualize_tomography_data.py +++ b/subcell_pipeline/visualization/_visualize_tomography_data.py @@ -7,7 +7,7 @@ fibers using [Simularium](https://simularium.allencell.org/). - [Define visualization settings](#define-visualization-settings) -- [Visualize tomography data](#visualize-tomography) +- [Visualize tomography data](#visualize-tomography-data) """ # %% From 76b2ced8259475ef677479ef858c45a5c3ee1dc7 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 25 Jul 2024 12:17:49 -0700 Subject: [PATCH 59/63] add h5py with pdm, remove unused imageio dep, remove broken tests --- .pdm-python | 1 + pdm.lock | 109 ++++++++++++++++-- pyproject.toml | 2 +- tests/conftest.py | 25 ---- .../readdy/actin_ortho_filament_10_steps.h5 | Bin 216961 -> 0 bytes tests/test_readdy_control_points.py | 83 ------------- 6 files changed, 103 insertions(+), 117 deletions(-) create mode 100644 .pdm-python delete mode 100644 tests/conftest.py delete mode 100644 tests/data/readdy/actin_ortho_filament_10_steps.h5 delete mode 100644 tests/test_readdy_control_points.py diff --git a/.pdm-python b/.pdm-python new file mode 100644 index 0000000..e471425 --- /dev/null +++ b/.pdm-python @@ -0,0 +1 @@ +/Users/blairl/Documents/Dev/subcell-pipeline/.venv/bin/python \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index 891a1b0..eebe668 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,13 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "lint", "dev", "test", "docs"] -strategy = ["cross_platform", "inherit_metadata"] -lock_version = "4.4.1" -content_hash = "sha256:a44304e4a75c0069b62e7aa6b0184948abfc853e8067ad4e274c1ce12737d12c" +groups = ["default", "dev", "docs", "lint", "test"] +strategy = ["inherit_metadata"] +lock_version = "4.5.0" +content_hash = "sha256:fa285c0f581d1fd5dd90539e6f132003011c49c5b5d171bfc0bbe76fc286ebbf" + +[[metadata.targets]] +requires_python = ">=3.10,<3.12" [[package]] name = "aiobotocore" @@ -78,6 +81,9 @@ version = "0.11.0" requires_python = ">=3.6" summary = "itertools and builtins for AsyncIO and mixed iterables" groups = ["default"] +dependencies = [ + "typing-extensions>=4.0; python_version < \"3.10\"", +] files = [ {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, @@ -131,6 +137,8 @@ groups = ["default"] dependencies = [ "Mako", "SQLAlchemy>=1.3.0", + "importlib-metadata; python_version < \"3.9\"", + "importlib-resources; python_version < \"3.9\"", "typing-extensions>=4", ] files = [ @@ -144,6 +152,9 @@ version = "0.7.0" requires_python = ">=3.8" summary = "Reusable constraint types to use with typing.Annotated" groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -169,6 +180,7 @@ dependencies = [ "exceptiongroup; python_version < \"3.11\"", "idna>=2.8", "sniffio>=1.1", + "typing-extensions; python_version < \"3.8\"", ] files = [ {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, @@ -197,6 +209,7 @@ dependencies = [ "PyYAML", "certifi", "click>=5.0", + "dataclasses; python_version < \"3.7\"", "markdown", "requests", "requests-oauthlib", @@ -227,6 +240,7 @@ summary = "Annotate AST trees with source code positions" groups = ["default"] dependencies = [ "six>=1.12.0", + "typing; python_version < \"3.5\"", ] files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -239,7 +253,9 @@ version = "4.0.3" requires_python = ">=3.7" summary = "Timeout context manager for asyncio programs" groups = ["default"] -marker = "python_version < \"3.12.0\"" +dependencies = [ + "typing-extensions>=3.6.5; python_version < \"3.8\"", +] files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -280,6 +296,9 @@ version = "23.2.0" requires_python = ">=3.7" summary = "Classes Without Boilerplate" groups = ["default", "dev"] +dependencies = [ + "importlib-metadata; python_version < \"3.8\"", +] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -304,6 +323,9 @@ version = "2.15.0" requires_python = ">=3.8" summary = "Internationalization utilities" groups = ["docs"] +dependencies = [ + "pytz>=2015.7; python_version < \"3.9\"", +] files = [ {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, @@ -377,6 +399,7 @@ dependencies = [ "jmespath<2.0.0,>=0.7.1", "python-dateutil<3.0.0,>=2.1", "urllib3!=2.2.0,<3,>=1.25.4; python_version >= \"3.10\"", + "urllib3<1.27,>=1.25.4; python_version < \"3.10\"", ] files = [ {file = "botocore-1.34.106-py3-none-any.whl", hash = "sha256:4baf0e27c2dfc4f4d0dee7c217c716e0782f9b30e8e1fff983fce237d88f73ae"}, @@ -535,6 +558,7 @@ summary = "Composable command line interface toolkit" groups = ["default", "lint"] dependencies = [ "colorama; platform_system == \"Windows\"", + "importlib-metadata; python_version < \"3.8\"", ] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, @@ -1108,6 +1132,7 @@ requires_python = ">=3.8" summary = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." groups = ["default"] dependencies = [ + "astunparse>=1.6; python_version < \"3.9\"", "colorama>=0.4", ] files = [ @@ -1121,6 +1146,9 @@ version = "0.14.0" requires_python = ">=3.7" summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" groups = ["default"] +dependencies = [ + "typing-extensions; python_version < \"3.8\"", +] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -1141,6 +1169,27 @@ files = [ {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, ] +[[package]] +name = "h5py" +version = "3.11.0" +requires_python = ">=3.8" +summary = "Read and write HDF5 files from Python" +groups = ["default"] +dependencies = [ + "numpy>=1.17.3", +] +files = [ + {file = "h5py-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1625fd24ad6cfc9c1ccd44a66dac2396e7ee74940776792772819fc69f3a3731"}, + {file = "h5py-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c072655ad1d5fe9ef462445d3e77a8166cbfa5e599045f8aa3c19b75315f10e5"}, + {file = "h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77b19a40788e3e362b54af4dcf9e6fde59ca016db2c61360aa30b47c7b7cef00"}, + {file = "h5py-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:ef4e2f338fc763f50a8113890f455e1a70acd42a4d083370ceb80c463d803972"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd732a08187a9e2a6ecf9e8af713f1d68256ee0f7c8b652a32795670fb481ba"}, + {file = "h5py-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bd7b3d93fbeee40860fd70cdc88df4464e06b70a5ad9ce1446f5f32eb84007"}, + {file = "h5py-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c416f8eb0daae39dabe71415cb531f95dce2d81e1f61a74537a50c63b28ab3"}, + {file = "h5py-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:083e0329ae534a264940d6513f47f5ada617da536d8dccbafc3026aefc33c90e"}, + {file = "h5py-3.11.0.tar.gz", hash = "sha256:7b7e8f78072a2edec87c9836f25f34203fd492a4475709a18b417a33cfb21fa9"}, +] + [[package]] name = "hpack" version = "4.0.0" @@ -1245,6 +1294,21 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "imageio" +version = "2.34.2" +requires_python = ">=3.8" +summary = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +groups = ["default"] +dependencies = [ + "numpy", + "pillow>=8.3.2", +] +files = [ + {file = "imageio-2.34.2-py3-none-any.whl", hash = "sha256:a0bb27ec9d5bab36a9f4835e51b21d2cb099e1f78451441f94687ff3404b79f8"}, + {file = "imageio-2.34.2.tar.gz", hash = "sha256:5c0c0ee8faa018a1c42f649b90395dd4d3bb6187c09053a0cd6f1fdd51bbff5e"}, +] + [[package]] name = "imagesize" version = "1.4.1" @@ -1264,6 +1328,7 @@ summary = "Read metadata from Python packages" groups = ["dev"] marker = "python_full_version < \"3.10.2\"" dependencies = [ + "typing-extensions>=3.6.4; python_version < \"3.8\"", "zipp>=0.5", ] files = [ @@ -1277,6 +1342,9 @@ version = "6.1.3" requires_python = ">=3.8" summary = "Read resources from Python packages" groups = ["default"] +dependencies = [ + "zipp>=3.1.0; python_version < \"3.10\"", +] files = [ {file = "importlib_resources-6.1.3-py3-none-any.whl", hash = "sha256:4c0269e3580fe2634d364b39b38b961540a7738c02cb984e98add8b4221d793d"}, {file = "importlib_resources-6.1.3.tar.gz", hash = "sha256:56fb4525197b78544a3354ea27793952ab93f935bb4bf746b846bb1015020f2b"}, @@ -1497,7 +1565,9 @@ summary = "An implementation of JSON Schema validation for Python" groups = ["default", "dev"] dependencies = [ "attrs>=22.2.0", + "importlib-resources>=1.4.0; python_version < \"3.9\"", "jsonschema-specifications>=2023.03.6", + "pkgutil-resolve-name>=1.3.10; python_version < \"3.9\"", "referencing>=0.28.4", "rpds-py>=0.7.1", ] @@ -1513,6 +1583,7 @@ requires_python = ">=3.8" summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" groups = ["default", "dev"] dependencies = [ + "importlib-resources>=1.4.0; python_version < \"3.9\"", "referencing>=0.31.0", ] files = [ @@ -1527,6 +1598,7 @@ requires_python = ">=3.8" summary = "Jupyter protocol implementation and client libraries" groups = ["default"] dependencies = [ + "importlib-metadata>=4.8.3; python_version < \"3.10\"", "jupyter-core!=5.0.*,>=4.12", "python-dateutil>=2.8.2", "pyzmq>=23.0", @@ -1579,6 +1651,9 @@ version = "1.4.5" requires_python = ">=3.7" summary = "A fast implementation of the Cassowary constraint solver" groups = ["default"] +dependencies = [ + "typing-extensions; python_version < \"3.8\"", +] files = [ {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:05703cf211d585109fcd72207a31bb170a0f22144d68298dc5e61b3c946518af"}, {file = "kiwisolver-1.4.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:146d14bebb7f1dc4d5fbf74f8a6cb15ac42baadee8912eb84ac0b3b2a3dc6ac3"}, @@ -1637,6 +1712,7 @@ groups = ["default"] dependencies = [ "certifi>=14.05.14", "google-auth>=1.0.1", + "ipaddress>=1.0.17; python_version == \"2.7\"", "oauthlib>=3.2.2", "python-dateutil>=2.5.3", "pyyaml>=5.4.1", @@ -1691,6 +1767,9 @@ version = "3.6" requires_python = ">=3.8" summary = "Python implementation of John Gruber's Markdown." groups = ["default"] +dependencies = [ + "importlib-metadata>=4.4; python_version < \"3.10\"", +] files = [ {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, @@ -1750,6 +1829,7 @@ dependencies = [ "contourpy>=1.0.1", "cycler>=0.10", "fonttools>=4.22.0", + "importlib-resources>=3.2.0; python_version < \"3.10\"", "kiwisolver>=1.3.1", "numpy>=1.23", "packaging>=20.0", @@ -2090,6 +2170,7 @@ requires_python = ">=3.8" summary = "Powerful data structures for data analysis, time series, and statistics" groups = ["default"] dependencies = [ + "numpy>=1.20.3; python_version < \"3.10\"", "numpy>=1.21.0; python_version >= \"3.10\"", "numpy>=1.23.2; python_version >= \"3.11\"", "python-dateutil>=2.8.1", @@ -2139,10 +2220,10 @@ version = "2.1.2" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" summary = "Python datetimes made easy" groups = ["default"] -marker = "python_version < \"3.12\"" dependencies = [ "python-dateutil<3.0,>=2.6", "pytzdata>=2020.1", + "typing<4.0,>=3.6; python_version < \"3.5\"", ] files = [ {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, @@ -2289,6 +2370,7 @@ dependencies = [ "httpcore<2.0.0,>=1.0.5", "httpx[http2]!=0.23.2,>=0.23", "humanize>=4.9.0", + "importlib-metadata>=4.4; python_version < \"3.10\"", "importlib-resources<6.2.0,>=6.1.3", "itsdangerous", "jinja2-humanize-extension>=0.4.0", @@ -2300,6 +2382,7 @@ dependencies = [ "packaging<24.3,>=21.3", "pathspec>=0.8.0", "pendulum<3.0; python_version < \"3.12\"", + "pendulum<4,>=3.0.0; python_version >= \"3.12\"", "pydantic-core<3.0.0,>=2.12.0", "pydantic[email]!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.10.0", "python-dateutil<3.0.0,>=2.8.2", @@ -2645,7 +2728,6 @@ version = "2020.1" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" summary = "The Olson timezone database for Python." groups = ["default"] -marker = "python_version < \"3.12\"" files = [ {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, @@ -2759,6 +2841,7 @@ dependencies = [ "PyYAML>=5.1", "aws-requests-auth>=0.4.2", "boto3>=1.10.0", + "importlib-metadata; python_version < \"3.8\"", "jsonlines==1.2.0", "jsonschema<5,>=3", "platformdirs>=2", @@ -2862,6 +2945,7 @@ version = "1.0.0" summary = "Asynchronous Python HTTP for Humans." groups = ["default"] dependencies = [ + "futures>=2.1.3; python_version < \"3.2\"", "requests>=1.2.0", ] files = [ @@ -2907,6 +2991,7 @@ groups = ["default"] dependencies = [ "markdown-it-py>=2.2.0", "pygments<3.0.0,>=2.13.0", + "typing-extensions<5.0,>=4.0.0; python_version < \"3.9\"", ] files = [ {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"}, @@ -3016,7 +3101,7 @@ version = "0.2.8" requires_python = ">=3.6" summary = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" groups = ["default"] -marker = "platform_python_implementation == \"CPython\" and python_version < \"3.13\"" +marker = "platform_python_implementation == \"CPython\"" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, @@ -3239,6 +3324,7 @@ dependencies = [ "colorama>=0.4.5; sys_platform == \"win32\"", "docutils<0.22,>=0.18.1", "imagesize>=1.3", + "importlib-metadata>=4.8; python_version < \"3.10\"", "packaging>=21.0", "requests>=2.25.0", "snowballstemmer>=2.0", @@ -3357,6 +3443,7 @@ summary = "Database Abstraction Library" groups = ["default"] dependencies = [ "greenlet!=0.4.17; platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\"", + "importlib-metadata; python_version < \"3.8\"", "typing-extensions>=4.6.0", ] files = [ @@ -3585,6 +3672,7 @@ requires_python = ">=3.8" summary = "tzinfo object for the local timezone" groups = ["default"] dependencies = [ + "backports-zoneinfo; python_version < \"3.9\"", "tzdata; platform_system == \"Windows\"", ] files = [ @@ -3675,6 +3763,7 @@ groups = ["dev"] dependencies = [ "distlib<1,>=0.3.7", "filelock<4,>=3.12.2", + "importlib-metadata>=6.6; python_version < \"3.8\"", "platformdirs<5,>=3.9.1", ] files = [ @@ -3687,6 +3776,9 @@ name = "wcwidth" version = "0.2.13" summary = "Measures the displayed width of unicode strings in a terminal" groups = ["default"] +dependencies = [ + "backports-functools-lru-cache>=1.2.1; python_version < \"3.2\"", +] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -3791,6 +3883,7 @@ groups = ["default"] dependencies = [ "idna>=2.0", "multidict>=4.0", + "typing-extensions>=3.7.4; python_version < \"3.8\"", ] files = [ {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, diff --git a/pyproject.toml b/pyproject.toml index 2026f23..fa3bc7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "setuptools>=70.0.0", "io-collection>=0.10.2", "python-dotenv>=1.0.1", - "imageio", + "h5py>=3.11.0", ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 615cf92..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,25 +0,0 @@ -# #!/usr/bin/env python - -# """ -# Configuration for tests! There are a whole list of hooks you can define in this file -# to run before, after, or to mutate how tests run. Commonly for most of our work, we -# use this file to define top level fixtures that may be needed for tests throughout -# multiple test files. - -# In this case, while we aren't using this fixture in our tests, the prime use case for -# something like this would be when we want to preload a file to be used in multiple -# tests. File reading can take time, so instead of re-reading the file for each test, -# read the file once then use the loaded content. - -# Docs: https://docs.pytest.org/en/latest/example/simple.html -# https://docs.pytest.org/en/latest/plugins.html#requiring-loading-plugins-in-a-test-module-or-conftest-file -# """ - -# from pathlib import Path - -# import pytest - - -# @pytest.fixture -# def data_dir() -> Path: -# return Path(__file__).parent / "data" diff --git a/tests/data/readdy/actin_ortho_filament_10_steps.h5 b/tests/data/readdy/actin_ortho_filament_10_steps.h5 deleted file mode 100644 index 4f53ab9660934d53aa532428775c4115b7977972..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 216961 zcmeFZ2|QKZ_dk9ONm7QCW|W~M5s{{S5S4izl6lUUp`xfrlhPbTQBjKKS!y3j6p5sP zGB%iJ?!Bn~`?%-S&BOD2p6~PheSV+M|JQoGjF(U(s-gtd_e(@Yh9g#`~pHOP&*0YnLreb_Hzn!4Os8! zV>nPubZd4i3m$b6C^ns+bwkcrX7HIv3I&dn{zcSsVI6EvH}{-wV+JnugnRyU|ZPSmK;WKFET z|Fm-|&&5yUVAln!N4%2;TF8Z3$cZ}5YU2zkvaZwrQtSTjf;K_rnZxRav>Qq{KkQ~m zUHsYT5*CDs4LiD&=E|tijr_l>6!PwqXu%8qC)1oV^}xhK{K0!CV*B?=O9l%Uwdc2VBes?vjgibLJ(7eBJ7U->;FlU2Hn}2 zhgG2Yx?t*0gmb|97$#_G0V*N;lNX zL+RpQ)6I~p@^#8G{u@?h*L$aZ{*AhMN7=!DgKmUtW|$djuGA&gE-8MN#VU}7OZs-K zwM)yz-8JwIxq51=CiM-g(c$(v5gG@YHUmD9AX@)eRw&5wX1LhA6G3w4_p~P4kw)_W z4t77d@YX_t;5MV-*^G!Z81}LnIgPV1=o#Ce0`AKi7;L zw1rpwZQcBjR!niOL4My4b@8(1-fl`=QU4+8(s(C#en2;Wa|!$zAyFB}>IxZ!VjZ7~ z$$s*nzyt?vOrXDGkf)=!=D^X)#fJ3*!iCtghLx6KQ^%{B(Hs6f!14cWCLmc85`Wl_R3Gg3>TqJJMsvxOsbQN4~i3A9FHPyXbe0<(NLgc?t0gNK*T?x zU5X%{Nd#srbvm{ma5*@pvkHl-(>Ag>_D_SaT!pm@EX zcd!ov%vVHZ1R>N6FQ*_Rkyl=zte~j0aDl4od{nHsKyHS=lQXKz&Cxs1RgUeiZ5R>k z&q&@;e6%Wbm@XL@SE7wG(KR3{N0Cfcv;Vy!_BPui zD~)zddHG%_|ZH8Z!cw6b1pv&Pm=%-+G# z$=Su#&E3Out(Uitub=$ZrI(_DBa>}{$sTVG$rC+*y;6FR>1Q zVY>XI$6~{D`A3h%es+22p%P;ot09LG;2A z+6nOzJ|l6-BYPmG9kFKrVW~jgDB1 z&|QDiFB#X|vcr|NBH$ zxI;^Q<#N<~ac)6w%0(EqVph+8`Zlg)iw=IVf1stlN`pqD+Q>n+k<#dI&eDKaeNn() zStDUi1W`KUQbpE?d=hky`@7aS$dUvnvMi}cV&Deo>__td2xg&>_H-a}$T|lFu^u0W zu+BmI#aV4UcW-jdKQY;fGg$S=-Z;s}GFeI)BgjJ!rS{*PrGXEA0N%cODf z(m041gb%s~Y>qrgMiEXoB0@eu#0lvCN0hkqv>RcvL%$ltgAUW^MGtpZU)KOfZ;b(0 z0EPO5#&r)^Fo(duwCEE0C5SpuGRbhr2hl0p{!TrE7wTFHPk!v_WgXPqa43<-8{K0bSj6YaTo$&|DsXWE0_E2a1!E$N?A1tTN_=Dxt z8Go>x@KXsIET_)+gXP5GgP_53Y9|{kr=GbCmZSMHK!fFEc=HeS6TUh@gYkq%PS9XE zxx)WJPOegakP|yDL4)f-8`J;|mZR5LhLjVr8-fPoiC7OogXPm$;y=jASl}Pzlw&x! z9`Rx7p|=0Q_!-0C$(8F5_mvz5FExx@dKkIPF!EW$$Y&2DmmNkvXBfHMF!H&>$cghI zL4)-+k0t(toH#!aG#IbQ68}Lye;B#aFmmN#tW>7lf}W~ zOFnJ;!+a+_xj)ElhpC_RwElp%C-IbjVPI&YuZ{-SVVHUxhmkuCBX=G~?n26`eUf;6 z(|4wu=SEi-dnd;LC&bjvjO>+B0A+xveVm2!_w)1(A|b35&W*%T{t64{K*&0j{n@zi2csvYhhYSoJ8E$PIAWA(w9O=r z@}F7#BJ7>$&3sxISw3vLSks0UPU5I>W!0lZ+cISRl-?9+TZb&C?ypSSMwSbaoZT*n z7E*pj#E|t+_fh`w;QrV4&8KZAfA>dwVs$KP$nu|e-tnjRRif=2^7lmlSS{Z*@f4X|xk0 z?$`eQtdFpGqRw~{_d~A%{_|(D%*2)0ixbH5U&bFT@Fz+6ul4_`Pon-)Bu>H92+=O#I_ z|A85pOy-B{7dcI7DJ1S!J^eZk1oj+>`wPEk{&^DjSM4YIN>D0`{dgyXrGIMlX|xMu z`B3^I6n>E`|C4bUP*vKn_hF52I$1tw9boOBsD?{q`LE+j=!w+uV`8BfqPx?F1q3a}$i{!}f8A#nA^TXN2fDt3iO%nGP@+%4d&f=N4m25(xWAfTl%DW^5l{6ai^Ts$Ka{db9JTJE zPiuo5(FbmiNVzb{S@W6P*MClH+GDbO==rpOmP3|P<3g-!Gl^peDStxpKeIbRVYwuZ zvL|(7y`8J?=j-R=8ZZ|%h%C<|Ib~13;8599vYhhUSmn#nx4HoXCC|um>iohgHw&Nu z`D8gYzU<{QR|NQN@bq;jAO$3D0?B{DnR*0x`g#o@pOfXkw%5qdJA~DuLb817FZGb+ z)D4YT$58_Eg2YYvGrY#Y0e1jVMB+q;#Q$t)@t@)}2G&=iLoZ33*pT(HaB~fUk%gfb zNfEw;#8KnV(i7D`O-Fx!mO@{VILgk{ksT?`9T>NP(Jv*-e~jzT@_~}qWI45tqM8RJ zZ%8>O$$3crmgL0yRs=1esiAM;XQH=dM$^X8ruTN3i(szrQu`TLZr} z@c*g?2>*?GC!O$R4F@TL@Y|?zVyha*mM46yY3L$IyC-vDm{~Mn#}j)Vv0qZZcSC8) z-x~@~?49iO6ON7)x(FHyPx!{fIgF~`1Enc{Xed0fqKsWX-Y8GSuZF@CJ}%+MQT6+v^g?v86TAM2x(Gj$ z!uz5$^_*m=`iZs>_n`28C{6h-L*a?K+51a)JmTmgXed0`4TUFse0Dq$>!@Zw0$ZlBvxp3oUVL*WVAWyeRM{A>b@ zXec}}_t@LN1Ldi6=1_P-ckK1=M0r_s5i}H@&;@(_yHK7wmkxy|_7ZmdZj_fp7ePbe zrIDQ7-u9q8b&eehFN4b2*N147C+Z+*C_J&&5c7kwhrK9G>^TGtg(q}Ptmzbf9|0t3 zC_JG%c6*9J`89unpM%QS=kI=$pZh0xLU-)_J%IA^e}X6KV%O(El%Mw}c%m+1y`lPh z2&ENBJjK}iN7TjMp2K9Gzz(G!qAqs*A3^yg1elH3>sLf_cKlJ4*JZ;JNp}2vlxN2u zLwQpI#769RB_wBGA7W9SxFjWJAMHwV8A z%B!=3QJNjU80CpQm9n=)lr}&YJF(*lTVY>Ml2D%bJwZeDcPWyy>*qAe8xdeOVy}N0 zk`sF#)t)mbO^hKsvEu!NbpeY!B82yev(n1&@nr)*H7qU4Pw0%D z*zp>ufY@)T_MAg$qK)jtj@Lv5?EO8D^5*PdlxD{-M|t-BA{FHc|COMj`m2TH?E1fe z@+%228?o1~jpXe1a1rH+Il)fs_!X#ty*+6tPxObNq1r>-n|-}XM|ooW2pS5%3dz~+ z?Gnlp_akU1JTbS~+jAM^iSZ_AD7-F`vybl;lqbf7o!I-UhYHyBc@^a`b}&k_}j*Uv4KCv1?N*zqQ)fE|AuAC z#@8Il+3UZD@kmYEV$EPDcKjMtz>ddJ zo>=4Ai5+i?3fTLbh4RFj!A|UWJ5<1)&qjGCb}&j4{vxFhVr~etq*cedah zs5+Tv)9HWmnM{cls~(@qAfdn09uBf&RC}_NS+kKRO|kr+wC5}N_06-1CaM3Vo{=i7 z_CyJDocJg8D5|pRnIw{_^H1urS;(r##`*Ez`s{14o}<)Q^|Z$=vLEK(&kX$)8tWg> z{P2<3?ec#qEo1#;`|l?Gl`;pkMEqmCDQGdH2?hoQ1Um->qYndF|B{JLF7gLjwL%B% z?O*&naIl@^FJjpVvv4^W_`sEe`j^p;>sVN2Vhg6xjy`5(vsvCb^|>qAjbXBs`2Q82 zx8KW?^_ejBPpvJ-v)0Z3S2Xai_A@{2GvONlrhh>LKfXsLpR`bO_yFa|kabXUmc{+_ zd#Sybf)oA`K_gjgz@H+Lgdal?AB+7sNAZ(+!mlQXIG+(j_(=o_vet@XT({R zAmR*45OKC7h_xg9jN4P^C)URC)4wIoS_Dz&4dQG>kQIylh>KDFGr7h6h|dyd1%ik( z06~OrPmsgF-=F6&Co=C$au<@jlH85t?j-jhxhKijlH7~r-X!-Sxi886NbXPabtDfU zc_7JyNFGe`^(5aw@{J@9A$cgtH<5fZ$-_t4@@pjL zq~wBRf?${*z99Yar}s5XYTD8gFTdo{|Llv)hbV7IS7pLH*V_t{o!-D@wkab|?DfT$ zUyQWtTt){CC#NZI$o&jHjNUl<%Y0w_mgQZw9N{i#^}bJE&#Db>&d?E1e&>U`+L~J_ zWV8Yf^}5BEgSx;J&uQbfZSldsEIAI9(muf?(E`ng2s+FVZJSUi<%8E=E^%Md_X@1t zG4rOgH4|7r$(}r=z#Hd2p*=Dp?JNjQ=ZXGS#e|HYqtiVCym6C=9XYW%?_e)TS!^uK z1hF$cX55x87u8BU#)Alc9@c&!tXKx^;}K zp{p0}_h`-Hvn5?%61FQ`3AVtXc~x^0MtkAw7aOlg&!)qVJ2-+~M1KV1Dy}QmUt5bi zp0^VT*uVqOE+ymt9(v?1{fRIMTQ9QV*<@`&>`&u7msIzGBmKJ@KPw6K5D{cY=rF z->YL*v;+ASMa$Q=d*C`d&gwjY%`kxTY{ap7-SBz$nU!~9J@D^_ZwmuGYk@)jB`J^Z zbl{yFtjE2~1CMj@xc^A92v#-;tCt*P!qKKHjf_9K<2I`s4UcYm057Dwn~r(O1Q$7` zn4Q|=j%yZIJ`mbg0d5^@sd}8rfUB-HTQ(}V5`=~HCe0s8be(0`lsMVR9FebAJNN2h_zntlYpH|bH zd}$#A-u|w5l;q=6{g0U- zHO*ShGx-tV%KPAZZ-Oh%e>r?g(q0CfQj&Udo_7^Iv;JxT&jS}63uS5^6zBmv9CN1# zOlSn^QjzOTon3HECugn<-wv2(a?kvYTnDTwF%L}|<$`}2Q`{+kvk_eLal|w$y1|x+ zS>erBobjAzP0`^~tD(W8u&MxG2ArWU_AC zU(&S0O@c&H;^OKdW-M^RaBB~Ao4&?<#Q|HqTW>bMQS=9}=Xs@t(+dVj*~H;j{eBHj zuiAI(YUn-4HM;GI;cF)R`1aiTO|#eFMV3ZEsxyk=2Q10mF(5{o3c2)Ewfr6ccOgnI=*gjy6xD$RRXJVewj-mt^S>0 zSu}t3UeR_~rN888t&KJA{4I0R=7Ju${(00V`KUS|mR6k*mTHARbzILI-pc^rt>>($ zn_CX;cy}g?c3a{ymjt$nS1{oi7fCi8m>yanNCpblX!~IWX=D5M#BO;oYx`2MZlbwWgD_9*i_0bj? zb6h5%Ib%jr8@!h=(ZTmkCp=!3^=wj*8UC@3uW6C*7x4Ue=QrE&bnsTy{pAhZ6gN0t z&T*vREnMn*^uRX>CKNr&U#36C6rXoTAX7{?7F^T0ue<=Q>lPi?s=rv9;F@iVyw>%U z0)?+vQ^x5r;ZZ}&`fcZoaktCnHhaZB!<=Wb2W{Nx;8l8m+muctTx-+!sb>V*K!oK) zj*0qRa2($m#w}okZA#4Xf!Ox=i!iN7K(x zeD^znSJ(9MXfeaZ6=KdO-)RYwC)z!nTw~FiTL+z8;va9hh{{b-ho3H$m zk2?6s4OdQ7&SJv(mb|u1o3}9b?!Be{VXN>@V>4(zGw8s5X_8UcqAx%skXt`_!b)7g zu`c7yj`mwWmcN;u7*Kp0-E82Le!9_D`vsNI#D*XJD_%1NE?zk8KDlJ^>^Q#JT zl}~WG&uzPjC+RTy%*RlyMiZBQe%5-&_Y!cVrm#%Z4b2qIiUfS-ry_Fa}Ou!B?Q zQhSFAZmHRieJH2HX9C-EZ;$^7nl@DCTT3b9Sp^gG&PAYor_|3a{75nU+!WN1WvPfK zPqUzpe9MF{yj`MRr`>^Bk3@vcMCRh3y9M{fzF~lC6W^?socaOWz61;Vr4wNMmlC8mv;&N5&f(or6Y(_W3S$e4W@vrd z^x;CYZfJU$*f99;yPLF}6+3ExeXy{Nf+z#TS>0M@_M{57c6h9Nr(Xz5_TH-95zmAp zInVdaUd)O4VUoHv4{$hTMW2w|b0(O4DJ-(fR0O;0YFi?@z7kw+3DMqvfdLiX$&51F zBZhs^JZXMleFF^1HVn=JJ>V>-XRft_6sG)g&)iWB?cl3YW7VZA9q{tRWqWq_$zta; zOzYpJb;GCl*biTfnt*NW?sIE&6|lLvep5#oGQjH~)x6gHYUn(pYiHL)B}^b+=h)4w zOt_aza*~DbQ{Z{Y&U5eQ1=tJE{J3KenPAHIi${FV<^aEiPjw`3F2s&c-xo#Sz<^qz zDq8rbDj57oNiK8SVl2;eFCFRQ6p9?m(2vLmL@-am%SX*KBFUD9`y{^>gJqVmCl3(5!M+gv$Zkh z1McG7V{72}0-3-!8Vq2VH#TkEffbnP_nrDV(M>=i+@LTbt{c7_^EiGfeI+Jy>fQUk z+zx0MdemocQ9Cd<4cR-&P6vzXnGP-)_W+FPj5n*HeJ4&ntn{%iMq7I8^_-gw=)1i! z;BjjOu&Jo^IXy=oGpmgm+x&tFG_`ae(R3iJn0egJ=a2ywvSE4EiepTedU#W4pvw#B z=xJc6#x%t4>fFju;bVYFGhWa8Z#a@TuNq=-Z@Y>!2~N?pKE$$Ry_=f&fg_vT zMiqSQ0-?>Xo#yLWU|$P)f+xgx0@bvMSrT8`;Nk;{HgVr9Ft_X_`%X6Wz}KZ)giZ1la!X#4oKFXu=Xj>)9bbdZnHg}G{_+hhul6?)Qe;BzRgM$n^=*-UdX8W1-3%O? zu?{@5clJ51r*+tp(2uYt!$OO9E~m~cnTgps{B>@b1-A)nVA{tPdLzi3X@ zrGpiU>%QB&+hY~8ziT~JZv%x(b~bQJbwTciPkl~f9I(4J6MV84>&n9&u*)Z}?+-F%!pmnTdtLV~1u_*rZJQ=L zVtt3Fg`KKqf=&8d2c7tnz^-}mP4YpG*!H|9w2hOH-$LSid!|uKKwnHhs11 zYO4`+AlR_xKv((~p!2f(;}IDrEa2kD{SWX?$oo~aM(BPUOq)5gdc`&;tmwMM>8$1! zaC5u6sY7-b;14$X&|Bt&o$4>x*^PgMm!`Qbn|6l|eV&B9yQJ)lSw!XNnRAr@Uz3CR z^8%QlP&BR7A;uX~2Fjy9m0bh$al3MEyl29Bo^OlC)j4B{=hqZJxK$1}jIFbs)y@D1 zGDZ|WQFp;sEO?+Q@u&`{^T+kvz25`dM=L!GiFd&srfAEwDYnC%9na+wtUG~;t-}+! zZWk;nT77lR^lorR$EJBkYcsrYd0%#=fh$(mYah0$iw-SwD-X{|tOX*p`wtJLx?&4< zX&qa(j|pxAnQ*n(A{cC3m!ZY!hBXFAFB$WS2}i6Cspr0QAHF^_{tm;&4U2!YzhPMp z1H^9SFI*^G0aUv*Zlv9E!>plzOR;Yc9Myo|$SAFcaa#UY>;&8~An-K#N^1w$xzC}= zVL0OY-)mmp6&UT8@u7^x@YGfXS-wNjIec~p*4WR!Nko)o&oNw zG0tw7>VbJR7VxbzE`V}h3PzqxX2Pk_Qc1GwJupX~{GLM(vSFO;W%2Y}CJ1|PlJMb$ z2bQ(@m9qSTDv)^Q+}Et53|KLDYutg^o>)NKCHvza8(_-nSDES4djQyS>4;{eCnh<) z*Ln8D4$!*kgvQ9o4wzI_cA)3ICzd}_?ct5WZYVCQ+cWM{BjA|9yYGU^T1-{oTfz$$ z1~5^94l<_I&?#`i?llM2V$+U=;w$blVN9G|gq43Tc=XsQN}ypaHj0+K)=T%KbrD@JTOh*W9v07Wv)ap1^7GW37$g z^WBD!S#&RK1)t#U>*G2g7c80Xyt4zY;G4L<+{hanSrI4H`mO=UBzfGcm*@ehHd+BO z7rn8wQ%t$dW>>*ED`sooielJSJHCEpcs^tq(S_ zA=vX2GV9wCOC?{tVB5{KN~>(-)xC?*hE_4R5oiuW(<+MJ#q8U-Tli%xXQ_QTE9BfAg# zd*ctBuA1NPcnh<*!yGuqFhEm)qRzJo-gsKimU){C>Vc}Wf9QVnJmIV&l-P>m{dBPx zp~tv7;6uE%`~KceP%JHN`PIn_=WNOi(>c%s#^g>*@b+kf)+oVp-a zRnZD!Ykd4~&2Yng)<+f@e4_(;&iCq*!ZlFRq&B@Q-xYu9Ah&mM5Q_7y@YXG#lnro^ zPq}aXTydY7dvr=(^#XH4t6rY^Y_Lw`MT>)|E1o8V2J8R>#x6Ceneeg#);_Bkvkk@j z{esiiHlFDL+?yKWQ#l%e+7gZSJ1#DG%cQMT?1a28fhcZEcbC76;{8W=nrexBe+JHT zzMG6=y+FOR`*Lxg6E5Z1HA+}7A6SHQ>RfQ{g>)6mlRBrJ@YJl8WtU2;VTaSDyK38+ zK=NGjsuwGq@M3JD?~XSuV76V$xwNPrIL=^aq)M|RZn@_UCO@VFmK|<YTXLOrAAePiP4k9GElsK zYtM@Kyz9MS;{p>qN4dN3g!0z@tepbtpUPLQEodCSotyu5Ol}JdJzC~&%(TOg-pTnmS*0C- zsM7%(&^)?4SD$y%GCTZBcz5BrEOd^00+;4$(BaC1nnnftZ1IT2B_811R3VlFulz z{3{bI-mC8+xu?&zN z7H-pWy%a=jKk!CJBm6*P z%(7z~ZD7e-tNz5@T`=5IvHI>ZBYd31i{gj2-SA`3=;iz6+kqw@^jRQgh%-<7jB0RW zfD5kmH-wpAV8%yX!2_4|@gn6F%fmqND~x<#7Gt8Q!KZmub@KJVK=gK%p5F3TPeH-7cR z&C9g#?57rb=TttzhYF0ML+coDCg&mE8&#V4OKiMo>CIQ*c~zkP-e@L}Ub-o~f1?K8 zsGzXj1q{j)B{5tKqJDc8h$U&;_`q z>+bKI-2%5AP1@(zpo078nWW4WVZi1E_k?Cl`3yW-=IYolRK^`o2Hr3$>;)G$9^O8C z{%d$}s>fR=8$}!+qi~>@mkDJLSDiVy?h&NJ)`JNQIsBUF>|*`9OmJbBZ;{raa`2)c zq}F+p4E|iTGH=vpI^^Vb>Di3pqz*5_glsz{@Ft5dQs*kWfXD3!Kjx!O@U7daEiro{ z{`&3uCC4IKpk4OL-b3x(a5A4DeSje9Cup4@y&%lw ziEbCHT|MUB)o-#Gb3;ZTqrMxupMCnGWJU{!ptqd-s;+=7uj75GvW^Mlsund*Yp;b4 zYn$d>kW|77`1c=l)$E0bcWuiwj(Q4?j6Gw&zI*|8vo>;Euvi~BmAS`9_e3#x7p$Xw z`NBd>wE8~xWm7u5F@>r3J*f&Rx_Ga69Jd%dZKQuKP@Ms;HhTBOI5(p>Y`Kht;xa5p z5-4`%bU=CTMMn!fJ7DI*bvmQ>V%VgzcRDWWt-xpLW3TD5Jz#Qb;W5iLbxd{5i4Ud= ztDsKi35TFnC_Zkr?9>EXO-zX+z5ePJ2vnYhF6I&L13Qi>FYJ1@96K_l$fhJS7i@PO z!y}EJ_dKru%6n2u8#^B2T@Ai`fQJ{pJE$1O0P7AjO|{}zVD63@u~jo$z@yHmy%)cB zK^Px2uaRRFCTnV`mS)ufQ@6*b>d$Ehi|Mj&L|k<+6Gs1Wnjjri9DBQcBZ`0B$^3p$ zDNh%}`R5kgNAcTQ@3ohg%T@tx5FuqGuaBweR2U~^_W{0y11@>oS+HSh(|LtN1MIQ4 zzv5HPei-)HE~g;z1;mc!+HC)7h%KAKWy)Md2cnP5RraJ*f}OMb`IcNW!V>Z`%Z@$o zg!f*JwR+dx3Xd+5&pc>vj2*BEev-Gf9n9POxG%Y+6J(zDtdbBi!M4?{J33Xb0lt{` zP3mEHH@wb2p*ixA3AQTECcno=gYQT7ZFIa&N{v4}O+bZBu0qk{nsjbEBMcjY_q(y3JNRgpFJN=o10O>peyyF&e|dqIA@zON>a9d<c@yA>8BHiPmwMC-XCw-r z->hB*U+j_^pP6)&ih?hT@&xMM{pV^KZmYZ;aN9>x2$uj7$fah>qGZTW|3Z`$F#i#kHdMWghp}Kk5Rf#It)Bpm_i0=2{Wc?KLoOTuyAICKE0k={L(; z$rZwj*=vZkY{j zuuD~KSKp{kFvszI;ml4KOvK32XcspfI0{!@;F*b@lgC89UZ?Ad6~)Jo$ytlyy{8Vm zTJKo{;w?gDywAI0OY~Jd;vV(_>$wvSeQz&<0g8J3+1zf}$bIn#U3L55*3W&1&L6)4 zpH3dDE@bb9-C4zX&vrvE$S|}qOgR4mjPMx|GxClbmet@0150|~oq48?z5BjEoosC% zyRq)rN=L7#Nl6{x`c8fa%b6Wu+eedtZZ~)AV*9>^_j_7kz>?KE3T@qRYuok8VB8%$ zz5QUA(c?Xvx6>^TFh$ZjZinc{)zJz4A!{W2Ra3N5_!rjH5vniZBN2YFyEZ~VU( zaArYwy*tV;BYVNrQ=8j0P`rQ247ri!k7_}Q{ho*B%5*4~JbRM3tS2TXa=UQ(h-O&y zlvCrKCLPRqtK71En)@wF6kpn>6deyH5BdIJuUN;{6>Orwru!yWur_M?<7S z6PP;Z_SlIi-XFhB$ytuZ1TB*eE5&`PhMHX$Rqq~Ji$%V1m*}ly!pzA=)nQw4&?~6Q zI9tCKQ#tZwQ_8|#FmK_~D;<%~LAYC@g1VL$=F}e>{qhC_>PH&rb8@2RMnbPo8lrf= zfU(~mEu0S0%GGmu;+w$XF;$YyJzkg<^qMcD+yU(b)zYlddrtk~>oWPxys=hZvn$VU zGy(dlqG?Cx^#Fl2qMF-KykBHaYju)MH5Bm-I3YTZ343Jc$Lyn__`PyYM#qdC!1Fd` zr}^Pt(E0go;%z%0EML6*&Id_mfSea01_vrUUow9e*LH=j7q1YfZHbB(~S2k zyryB6E{@*sKl8qJf&S6AaKrHcbxi{lPrc%kk|*ko3#?eO_^^Bf@F-n=E_gfx&R?r~ zb3}$0Udxl8Gj=RGe{VQ0caqit>|{H>E4X;!QTxN@R;BjZ~A=1KoXHG}(5?{u8ywy&Oe!oHmzOQ!UK@w`)~q`6c8#o58@ZPEMvvFTDfH80ZP z_OFx9FXgF&VZ6r!#B@Ay8rNhF`Jygx+{3u8*Qx_-{x(Ivx5We3o)(hBncD_M@o)ER zm(U?CA769!gairE#ab!K`(b$!h5Rery6=B7wIP@V$f5W9Kh0U5 zS7q4^lQo3LVmU29KY%~_zN8y|`^6TUsTxdR^GxY;P+~RAv(47kcHb?t;(Nn8LWVhymv1`!>cGHiEmSQfFGby5Lz3o*ot1olq`mA+D;5-m?@K zcjC)v7ktN&^V=`vHG${nYF+cX>7Y8*Y&8E>XPlw-dHs&=8u(bgXzFo22IS;hw;Vs@T!R@@$tBVX!Jm?erV1?rSo9`WIUt6084&J!f|N2xP1kwiT{wUrr0%j&i zPWcEA&VMvg80ne+)U!A}9Vc8ndYh2_?G{jYQ`ItgRyS!8~pSwY>=+!Nvg$?l0waROiiyU$J(r@V)dLFf_JgzwY z@&~}XrUSP|@qWWMvYei0dO`T1$kqg(D{%Sx*+TWZ9dK)@T!EYD9ArNuZO#MTQmE0J z(cg*Q@8>zUcByUxI@c^*mR7U52JBRKq_v~>`|kxwxkl{nhB2A(i47Z?V0vVxU0aVG zUa{M+wsbW*mupx%sS9<3)UCp2l9t=y+wNQ#$*BAcz2a${0G%5x(o@uJV{GwD(PJ9~ zHdcetZ}?(PCo@1%D&g7Zcrbr{9Jbu}NyOZ~9;Z?d(?v^nSmG&d3#eTVKGn zeW4b{8ofY5X>?p8iubqWmlQqNgudI+@_iDT!hrYeGb`jDuEy;i6uddYXofuMH+z_O zk)E1lq-BIx-M`;q$D3)}`n<;SvVD zvb@I#Usk^K{GsvfV4}tBJqONq!$jfP9FNtE@WY)&1u=p>5Er$7*zes2fXU(IuV)$J zDWg2Mtno+ReQEKSJ=y*VT92N6C-{Os?ihGEPbsq(&T3Xo|9JQX@ZGyM%K^pv^H=8` za1`qWS7%+;ujRf6o_tQ&o5W)n!>qh2|nYyp6)l(4rnDa&um0b2RI_8dK@oS!>84* zHM#k$3s@+tPZ1y23N13!m0pZc#or!SD?7P}4h=*#&lunO1j4Kt@9zmKh1dooVBJen5mB5hpymR z+@M9eT(eqN}m}>inSHv*S)2k=!o!iLbO?h?Jh%l~fZT)@xpONOuro2+l9KuQz>7sj z0`E3`fyH-&MNZF9!WKW4P*6C{gk`#>O8r7bK<|lU^`XKA*q4VUE1#a~19u`v~z@XcqG~GiOwyvj8PU}-M7{7V$%dCmrpwTkvXnTh` zrh@I&DR=k?jgRRFG^6*Y$IU(Zu)gLW?ih}g7o9hL3}{p&`3=SJ`Mt8rG1 z$X3h)Iyw=7MRR*$zwz?d#`Dqp{j%f6?$Lb@cN%W|>WAV@g^hMEV^6HWVs&|h##goi zqpNwQjLF^5WyGy=W3E+Ls^c4PAkYD&ZnqjG)U^Xq`q-lHDBgeI_MIBB&FFc=$7PP| zteW7rJ=wxjU+QAB~w3F zgTb^$STeU^!^IihFgHd{wPov+3={3|$K>wuZ#2bB<2HToD}4f; zkDtzRYeL_5l)a7ULhtvBDrI}%%_T6sPuxhvf(ceG9vQx_(hQrt*CSr|Q4MhLlJ1tQ z?S+6@zO@O(`!T_~ZWz=GU#_5UGjgN@euKr*G<^$fJJXgk0e#0K;-11&sL=@}^cFo7 z?6$yaJ9MtocG2Ot=O-XHYzBtLDazs(EwK{5)>yASOmJcjbA^6hISeva+wbCKg~8ZS zWA}dVg_Dkq+#han8rFZ>J9GO4YwW~_ROx$dOt4Y!gY-)Det(yBX~L&`YphSz-rpOo zhssMw@1Ah&3%vRKdRlPUYD{`+|F*^>Jz!PK^X=h0%>ayGM0cp#Uzns(7md~czv@CmOqN~LWz7m==iL9N%k89h&gOO&pf#Xd+w|^Dx!?Y_Kwe9 zbKMSm71x6(UnG(v=PxO?Vf$+1g`s4}FwO3~B+<$;H>4!@8irlPbw$qa85I zrK1F;>bqb?{>t9#?Tz4Y()BAJ-5fAxZ52<(UIq~DGZufc;0v_BvrhCNiue1=r)-Lt zjGpJtUMH#atiOz2AT8hSW6~AN2gQw%tDS=s7q(_Zy#Cup{Q}CDJsa7JVPn zoR?T@QUTAv$)*_vj@ZfmqdnG-=|IQxwbSz6I*?oZI5b+y30wXt-RC(+H{=PEI-JGX z2|1=)g5}$su&-Rclk{%30!KWeVoh{6NEn?}fANhIX2n0P_~DvbxWTHa=&VE^jIFuk zo`d53M+>tqjv4z3SZbV2Ef~=UzTJ8;@!>&d%>MEE;@!&EfJ16@Pj_E0Ok4Ny{Z$n2 zug$o+iC6FqG|ree=ZS4EC`*9QZiGGa(Z^k|VDmTwSXZjonYd!|#qUEkM=+s? z!RTj;Kh=WyCZkp!OL4`N72WUZ_%cDAxWtWRb4%b<+saLA(fj=t6K>yMrqv6#NUqtV zx9%34zU0<-?bUABy@!e4A9gXphI5N9%v|^$Jir~N3EXtUc8c^AAU_7qsp(W(-24Sj zFWTRiDC~}{F;ZXcBiISVGqv3#cD94-8prRSba%%d*5%uT>}Z3Nggac~OiepTUk^)!DW*^nIyfj1FDe6MG@FMeC}5 zBb2_{q8B7i2OhI5%IdawVuwOy6SjTm0F(F3d#hsA3ESHjuK85uiH#1^`|d?U-wR^$ zTLX9-fn$`QnKp{|AKGQSAb`IY9Fsn==k&Z!u+VU%Q{$1f*a_#ceyL{YT=_O+tEWQ_ z;GVlupoC&=%YFakTjYav-6T0B%UbFnM`rg=r zNww~BA6kHc=_iFn(Oux4%~|#7DBgc@kEFF>YYm*>GF=UXGhx@{$oN7YAMEzlLo~P| z2W*rSo$A)X1Yd;jcY34u`y&n;9(C&?Qg)$;Ylx`cq0b6pqQuX;Slzqk{4 zUzoZz=7}#>CeOR(<>+qk`IzRI!{Lo^G#S4b9PdZpBM#8-c>ljQ-am%a`|o%^OA+Fv zK7PmhS&H}_?`P@Zcf6mahwbG2h$8t8lJ6w>E|UL__p=o7PviY#Nqzor$NNV-=N1Y_ z|IepXGz5G-*9|9?TUGL=wZRp^HIrtP`QW*C)=H*~YXQp*J$N+2yFkTqoodgm0?oJ=^vBe7?Pq@xepK+7}tNyaW@TtqKD7nBbOQk5AxpZ+wEx7(Hpp zQ{erTG5r>~{cvBO>hXo^yz#E>4%tx$OX03IS&1d)Oi+4v!Gopf|M_rj+tV>><`=MX zRJ-iCD-5{DO6B(DhhF$ygHTJZ=5~m!`e%@aHmAV{K$BHXy@SN|6B)$nRfb-6-jP zJ_Zum`sCk@k$_6P%cd7V-v3Dtsc`x~@X;rJ)sva)a8IIC(Obm(U9)>j>^^^k-zHQ7 zKlW0Q$HX1{STvcEsKyZCSfyiH&(e5em?G|vE5Mj~XGQ zu5$)l75?P>#@7R9;gQ+#h_(U;4c;&_Y^^|Zt~C}ZRu9DU9;JHRoCDQ`*YzEd$EI93 zze@x0{xEBT%(1``c$ZvG_%KC=jb9G`-LP=Svu^MN*|9c(aU08sPc!SNKjGwQPIbeD ztRiB%Git%ec~LscpHx_8-mt?F^?X{5J7sP^9)Zl+v2;a}RFIi7H&CqShRa^wUsI^E z1ojsxq2wHM ztivaa16P#99q@?cxzA0PTH%>iocT`+>O<^5I42u!j~6`t@cE156qplG*n7p647I;S zOFf2m_%k0-I!EqB_>nHnL(X>%ylZ)3`GVIDCy%XLRfwax`XTa?A?hR*gq2iQpFN9* zcsQFh7Z9PZWPZ&x)mac1o%W=;z!q12e>-Ru`Lh>U-CyhmLy(Z3_LY-jgV#*^?U-aC z!>(_QEVmY9$gsb_zPi7_`hhim%Yl)l1%0XZTA3Y$>z( zFm=@ue=OlAaAVs9I9aT;L__n<xB=fAU3l^R5uUv7S)kiA4$!kHSuv_54 zCzH3CEoFgsr>0*?{YIUckD0zoi1%Bn|2Wj0(gjH5%D-%RGasY7Rwe|4eo#$7d2eEh;LN(-7>XP%_R z(XK9nVvjd@HTzBQtVA$;E*ae~TDUVf&^iiL9)EOO|7eW!-rm>RB29tK@z$)osx3fw zYjJWA^?YW3J#5^bKm~XO=N{v25IlIE!mWXNKJU3cY4qk%A^A+@fX3BM7*QBo;1Onk zN5$0kUeF+d5aaKuRa|r6siR-UZXSL7@M*b(M1C@?o$nL2eLW8oCqLfHDAB{UeBbMD zPeq<(qtKmix~m|$0K1odMi+mwxt$~|HVWhT8qe&whdfA`=Wz4HmDs9qZhqiCgfNhBy8ghF7lC(K(k> zfJDQDjIkxaPj}7zono2e1#+4Poua!-0 z`=pHTi`yqqX1NC9_nAEAZJdWY0wx2Pwoc%@HoeZIToMck_wDoT8V8l`%Ub;V74XZY zBR{i}X<+<7nO2ooD?HB>xx4q$alE_d&Dg1O5~OR3R5Bf{gwbm6)mS)>;m%Igp(SVz z$+llw^eJ0AklF7alZ$x&>38pVQ=G`qd~A*357Rshusl|yxN#7_$|g9b{DT0(k2|)F zZxKKlcg5Axg1vaNbM?xG%^Z9gA!7Vhi3GDQG<(l(vf$pL@BA`%jRPJ1yM^z>sem&@ zQvZg;Fx;j}l2crI2{UBl)wnikkRV~Awq>^+d(<)D@vG+xlvBNZWRsZ+%1)OF&hO>M za^KT+SAFgWvLZhnliev$CzB_=a)&S`{D^gM&)h7`jvsr@zP<)#KKUr^KQ4wX@VZOU zWvl>Y*|a$BBLt|T?&fa6A&IRhe#LtCuE9h{CFeA^S@5p(@l!)JSuA5dt@RKO4K(=O zXHrX=gn2$(0lSXMW4au7e7~NfLd%H@(;F|F!D!SD*12ItteIW-Xe0W)mf1(XdJeDOV+SIK5>57>ozKG%B>$#FU@f$s#ZnN(#W zJhhhRa)Q|a)4I;Hp2Ei@fyEBpBe~RC1vVEx(w!e0f)NG@|t6>w` zBxy_ZJPMsjB6mX;Jl?+saks*-H;G&mme`E-2i$M_Ft~6wkefk_27}I*Y+g;b!s?%E zzOd9?gukyFR7kdw!RuM_iD$aj*yb?kwZzm1TK+#a~ zaME=5i(Rd)o!ufuOCn;Zi>$ZT(P3R z4>t6TsbH$SCX?q>F--Oy2X%<|JIEgpyNTxB92f|P*Nplh`#i%t>K|7uFSD&d0(BTW zYLrF!zfJ)~*Yk5R$nP&ddP?EexmEZp*3ex15COW}L(v^0g=;=;JKqZCT*l4MtW$x6e*D#+ z_uVo6Ek%*DloBve^Ft=BjRv>gGG%j3x??kv%5_1#f1wE*&%?4)H1JSqle$mK1LN!z zJgpat{O-qlZoD@k!S9dte?QCgz{JGHOZKNO!;@?YgMa6jz_}u>vE&sGZ2QV*+|Psv ztRhvF-CUPp=J{=&Os1aLiHiyrl20jUTzCI{t>zIR(3F#s^VSo4Ix{bQFo_D7ukAW@ z+P)1QQVdzUL+^#1+%u}@vPp#l`)TDaLcd^IPGgyqqZhV2j4gvBoCe-r9qtK9?E&rY zA$KwA`TVYOWZM@*g1mAz-|k`4@N2lj>AR@sBmOA2_Z*ss8MJ-t>3e4dVC?tzo*ltoBP1)7H{pSGBOZJI`O;-^?aY$D?YikAif2lO|+FK#szoW`;tE2xFByb;Z8xEy{upPd@p}_|mO!1BFESv;uj7N)} zwUHs~#|=+z)bokbVmbbdFbDa@K1dJUBm&dvZRf?KeX+M@pNHxh2!Q09Jh0)j3jgYL z1#J%aVkXfr>Gc8;u2jsGbO}v>Aj>VY{U`mft)rSICBkS9yl8ZT$$A0~@!3sHKk&mW z%Fe#g7^c9377S+zi>+YW#*nZ);{9tDj~->9`}yCa-{i6IHv>|EJPWjWNRwiPw49jLVo`( z{-sdUrBQg=@=sEoEV};u<2rPbGYC88K2cEvm%!-P*BgVXM7S1xpgle?2(xWgulx~4 zgr_%E-z{tqfVxxap-$BE@q0Qs3U;o6&u68Arg`V!?tl5k|K$Duzg)q8y#N1}_jCOF zeEi4z|2YWZf6wDT-v7@*{KxzMIfwsv|3ByOAMgL?9Nzr%uw+ z|MGs$fB&8TZQg&Ur<~pz^?WQ}G0*xS4>ZQ%hG?3~GW0wu8IXl~K4I54*nS1i0oIq| zH~*rZWp2l#O;I+ApCiY{Ql5G?58sm+o5U-H%Tm)1pWwUs)wPT zj|$JubTY#f@MDN{&D14BUaZa274>{x_p5B#Ff2ocV}EQ3KURQH?NixNSASfem%V%2 z`8Cix3~SZ~7a;F&5mlcB^?b6U^PlL@pcu!Hma5VWke(co#8JOr>%O4V*@AW8EhCgE zc)kP9-);%-wD800-+oGwT&KdlMXz~k4NBneMYc~K+x+maes|kG`cXmaDTWz!#Fd1e zux^MT-hUvcQgiA!5vEG)=hgj{=W4~rW4;#(}%J0um?KmjxTRna^Gc$~@1`(nih zH>abFPBSe-kuHI!fxA)P*Fj(C2J-ti9^CG&;}{3o);y17SW(YZCBkY*-3OnldVRQF zqaBhwEI7?)k$3*yIKvk4ep!~=pZP>S!~N@v>Jod`L53jN{SNB)pV;+xJ0<-un7!hx zlq*7mrFk#TOrU)DEwSRd~Ms z*EQ_A7k+W4gM9QhBGia2$q}?z0`a_aZN{QrI8$eXdBO}8`0nGN_Ft|e|7!IFXKOfOB-S5;CW9x;ai-~o!oWsS@HwJCysKUcg9#oANl=b`bo;a zv?#DxZ<+1cw=Ss47{FUv;enf6j#IUrq=0b+Go2u#IpBFr)ZiuZ`!CY49j9hipsvmM zkq!|8+~B{`s=$JJ|FbmDi$=4cU#rB#3iX{e)`k6QP`|%&CRN|xbp-OAC8%gGkl+O= zHsha`?zpeDcCi_?1yC*}0_@g0DDif90yf?7AJsaxc5i9{)19jwV?D@E&0T3djd=g% z>#EID?PIXBOHH%|^)6gRea<{Ue*Z)%VS-n48Jv%iYUv}c!k!~(?{Vb!H*xbNi9IJk zuWht=VPOJb2xdMZhIl`3w)bk_J>EFh5Es9;B(B%?_j<&-dHT_qftzRE<@iMUga#pYMl=ZbZbs{-9dWw zhR5?@*W-sX?DvVlXEiAJgq{;#{di?@VQ~t2Uh84E(m)*(5y0q1QXuBO0_Ur4b+@8^zu=zt5f^-k(6;Agskz?_fXs6H#PV$MchQ?W zpBmFZpYxHz?8ZJQlkobhHv}|yZohQudGSTPm z(YDxR)?OgO9Hq99VvW!6CHvf1CBp+*=XG;tCg7|7w*6wGR``mi@vyP+8c@#3CfDaJ z0nMd7yv~UClh(Talzv`>o}!kSO=zCtz~}JV)-X%Fjm&7XaAgWSEVy#YE1CrEuqi}R zs*vCBGwu&Kx=>f&yT3J)27mfo=&Fs%8UdM5R*og`gPSdRTFeALxsqh#-9ZGkmhQk;XA)*N zT;BaF-xyb9PQID*feafq+^Y_!)uaCSA>fC2zaH=Xd2THl$c>2(%`L42BUHo1%uYjG zaY=gmWgi97;rwo`@4BFUm2pPLH3QuGwdomo>oqXO$hBewr@_H3Zue1EeSB-61xlg& zDnqmq{b!u>5T__m)DZ7i-9`Bl6uAJL&kIKX7A6Aq0|Q|Lmb$pStxjl7z$gqUPf90M zl3{`I)g0+DZG6y|EzMwe576Jew33xW1G|1&o#>9z!cRo{y;AUf24?<*&_8abLU)%T zk)eH>c@2cV2 z+wz@{rV`-GyWFYO_AB6x`QFnD&C0m)wwkM2>4 zBf;O*4ce!wCV-T%@J+%&1>EuJ+#bed^t|@m&h}{g3lj=wKlWWaj?Z}{uA1~xV7*;k z;;hsU7^b@W<-r}t@Sc($^@L>#h~hu|@XVSy8I zBg*%3QP1al-o)B%O)>1{y*-7pmsfyb+T)a`E^F{j+gzavt0b2GCRpdqYXbaar5U?D zWCs0xzMI}BEsKfjs&p&tp@CI8tDkM@6EN$2;$q}sd8}7aqvMY)>J6O;>7_iV0vY7} zy5(Jpn3;1kOJfcd2=e^hm;C29&`~eV?{84T$Sx`3{d1@*Pyg+&(OM70_U?;_zNU&* ztknozS0e#0j!_`RI0q)u?QkP&4U9$khcMM-6>?s@%#sqk0@Ny>0zz=9NX!Hh9MiL(?@om=3&1oC&=Qif3n%O~2n?$k~LOzzA_J=1%r=ktP2|Cb5@oW00-um|<~ zfA6iSe07HiE=aiHhyTn%{X>lk4_Z!R7tMQhOWgj>`nCSy5?zyIhEGAZd+dgal)7U24_~m zSIJBMF}uyNFU&FVa;wuYaHdaf2Hl_JJxergM}Ggni<6PTF~dOfORva9J2I%U{(0X0 zh6PqieVf@bQw*D3hS^ETXVqcc)_GOl601TX4)NYwY3Et-DuqR)Dd3 zO4x|QGHlU`5HCPIpQSX1Bf+9%_%|n~`^d;Fxcu!S{g*r&tfJ@7}r=2qr>jpb{`EJc4 zzrSB!-D{$n4D}|rqWCMOp>=y$vCtJeEY0?cGqc$mkcj^()}y@)OnCD(b>;0bIe+WU zQ!C3*u(Vt4xb*MhGw|h zlNM-?=E}^sN5!d|IAShSl6=jn*T9qKS;yCi$m=E)GWsFjpAv0)P@%R7=KZjD3B_sn@&$QK9;Ki8k)<%}JFvdvtwask}u&8{RqA;L53f7ZiU zU9j#uZ$(eotin9QJAZA^{)tj|pL^vVE|`FWis4h#xl0dB3go>n#S;+4{u$i9DXiWpy{&QGFb371xf2BwR@qYQOzjQKT>o70< z%UEA(ACxdF42wd%fBJ<^EE7fsksEivR;W&crE$)a&k^tElYC9C6GGjvNSpTTN2uff z@dlAA+707lx$*9vIhedsjsX)NkK*CUsztgy#2jc3ndK{tv3zS41Y4;82Y; z;H_B(LR+HgZm8cs%`(W{*u4g#m#;rQ+_nrO-g*plBftNR??-En6I7@UlP^AN9R}2w zM7j&8=fm;!AY~5ClhiVw*W1C^4pjoI+Nd6Irtg;YALRFM?Gj{_y-b8;$(@w2ktz6eitVxG9&c=Cx@OXT zyH!BQ^GZ>7UIE;;><4)Ky)nIVPcDA#1^8l?Ea}2Sgo6E25AJ>U#=PIn4>4Kv0ITQ? z1z`sY7_O;1yBGC*F1fEgU47aJL%7=5>lta#lJ}1I)o>q7OX~*h%#klp=lo*rDfIc{ z1z}C(d5aH5%8~Tt1yg|S-lf@YE97(R&SD!o=8JhBEv2a+n}?kj4l)#>4pzm?84r;d zU#!wdF!b3D^m&X(e=g=S0TziK(|m&b{&^i1()}l>qUS}dTIe1F@08oSxK#Zx$tSuB z`g6$hY6%-kT^oZuRtK1~lKij{mL&~4LmH$B9D20+y93zWo8H4P?}xQ2e>|7WKm$St z={nrqTfoIaN0k6A)aifIGCDUvg!C^bo-P`V!w(fQn*C4xv3pe|Z}*{I;fX8wT!O;_ zIG4EzcA$R$xlQjRCG4|x<~yJU{v?jnQ3AN2XS5bqaF*C>{ET?EYO zhSyb9*I-uc`ixs}5Jp$AHJB5%V(j=X2EmJ(zX$c+M>3 z`j=n)Pu|b{@7Mq1{r|VTpXc9m|Bv_oa}dJ+p2vT@|DS{SkN5v`4*&7~f6n1Q-v7@z z{9o~Y-haN`w*PJ3U&Ui4^;t0p7c8+VWXK}I3lt%7cgAIC{$W~76#4xJKZ?9Eh(dFQ z^~DFK4y^%(vjb0xQNMqOJ5_BjHV17-3oT#pq3-PVW!quo_orJ7=k|T52k{q-ELfXS zZ*_iN!wdQS2|Oco;qoOwn9ARB!h{BYP>EfFp#k_8$Cta<(%NA3Qbe#xE8_iD@h*(W z?~gkdh1Y4$07@X(_W|+H5Bpv?Yy9xX4{IoyFlHj3W$&F`0__Cg@j0%n9r6C~HhJI? zy9TCZzHumMF2YR#GtWR~e_YaluP1kr0yTWhcmi&YgYN_1m;My`;R%vvU&l%pmnNp z0P>22ty^Lc@6Wir%GbcO0DFz}pw2G>Fy?v6p@(??vCvex0>uAgPu!Mp@*}_r<5Ncs z5bu8&EVY z-J~7E1HBMVRA0o9-#=F*27k|y!TV6z>p~Xu;DpCzRz)ul{BY7pwOjED>PCt_*g{;x zP}J_jLDcUbE^+s3zc>xJzCs=%+WWOi5h3VlHeb9fn3OJgNhzdr|%_{5>b? z`Q#>Fa=7H)242T2MQzq@fY(#mj`z^MUcdA9l)uB(;CHEY%Ed$~+*TaLp8MPlk7?F? zzfRvXt}!?|~$z6a1T~ z-~aWyn_%@33gFi$vs3>y2_*TM*#r^qr>))y9PSANloCN49X0Crk{r<6rabZ5LX;}A|CA<6t8O9vVeJy;?5#Rp(sY_4m0QjKx z=DNf#Dp(d!Ns<_Jz!eEe@95~uAj8DE&CNp_aP8`i?>@-ySB}n}JX`P=#xn@8b3Q~K z@XrJL7%to6I$4Y*%*`{Xx3Wrqv6T$DIN$i3M7;k?n1(31dLI7uI&8&*_Hetb_2AI0hO zdezvp8$9~pNySmm$7o&9{Janu-ic=LEKf$i*KbqA&iA2iPjzhG#is-yH3e^K7o$#g zO_srV#QPoBwNLZFC8%(Ao6+}PA`DsnS=e`{8C9)EQ}<`^;m33yXNGxi38i72y%42l*%hbt6aY-DDh16KDh%(}M|fm8DA$udJ-JhbCiu;;;1 zm?NSB)E7yx;#&FEp9yVTD7n_|hiV6K8%HqlKDv+l;JZdttQOwK{qhQ`5rZosEpM7{ z)8HMikeEW$^Wjk757wO5M?wDc6yC6M%z%}nL7T` zoA=aki&wk`j_5vPm*W~o;>*vd z=d!n|+u=YJG?hwU*vWqkH>=q?D=d$^=3YhbV2*AeZ}L92IqC?moMpaGYKsJQ-isR0 zJI%l$_p^g^LkIC;n;$_tw=DzOo|K!U5CRwyxi<`__u?^c)b3Z9&q2YBCsL;(&^}Pk z<5ebeEci>;lMA*=BjE3z`4^;D6ktJl^f?N3s`37jAJFwTT)FR6#B_oN|EBHQ+-tub zdm0qt&er(@hQ2NQL`8kG@d2rrF@J81n0%@2X~+G06a;(>vr$*!QPY}#lkZRP z^5q}5#!f}-O$6^34Bb~DXYARQ_^2J+QE%(x>r}!b6zDG-9zgqsG^>PuJN$(rhw`Vb z5${j5(t^s8B+z^L{7YAnc_8uO^1>Zs4eT&~Qwx5D0I|oP3$Kt?;2ZgelGpEHn16v> z2G^MtP|i<0-AkYV$+I=AnlqZ1FhA{kr^hJ#yjO)K&T3<`YIe&e2Bm;5 zfqNq{mH5A&fDxPO{q1P*kZwK7;k z`zWZSH)CK-BMd+qW9^GjADwnUyR0D@_1(f<-m< z_J-Jv=vQGM1xY}0>sr3T$Q*PTU4Pb!c)!Z-$%j3x>rh+v(Eg5~AyEDCPq6MWW6US= z58uPPRKWDGJ6b@d0M3?;KNLwhgB_axVBdO!0*|j-9O0C1fnPo8Yd5z{FcVGlAB&&J zpir$RTCI5;Sk+SU_47@!$pcv>SJ(*f&j-ob_ShBpF7;W}NjEc0*l*|KIeP*izBj;Y zk^dRT(rD1gZI1C9Z5p#VPow+)UlLPN)}ZUw&BJYo_sd$&*Y5m<=HBY8gTn8U!G4|5 z%zKFU|876|O6o#BJl?QZaRki?DvCY~J%IZC3oWmrrb7B*o`8O<=?5D4>15|AIBtnO zPUm5CV;KfZFTGdR(d)BbCM3ZD@%~J1eq|1K)R6;Ji+$1*U~aw?FJfekwcD%p{7zi~ zeZp&{#HY(JhNIJ>kKP80XK``uMfU-zEY_@-KTU(^u)EJa5$|uT{t(S=faXKornjAv z9fDttdq#dB-v7&d=i`&6o6u}yAS09GGkjN@o1VvU7Gv8z(Pey!0$vn(9;>MB0GAo^ zTtg7=R}zn%oU*3CryG~1{;bYIo1tSjk}um~jEWwPx0TR*nDQ?7-!GTJwwYZEyA|v) zX@U9dl|Rdn*yDWd4B8u$a4yz86z%JMp0NJz0R1djxb}SG-XAhJ-yb47gZlm7OkT^H zt^I)ue&s zHm~tfVVcIkoyVn4nCXDxprDVW~~e);=!*)eJ>RnJ5xG z&Un(O2KD><++r@&?q5UmO9tf4&vU?ZwLtrkrwhihdf3`YmJ0H|U5zv+O~6}+`EpCq zzTTY{S9j=#5aG!iFMk{#9s>KKSZ|K=yJA77{Vt9!Qo*HXhdKPDA416~6N96O_ovsr zP_a#>L5#yH7d-2P=WU{7coFXxiMbr@S4RS-mm02ACCma2o7DGz4!dDD%;*ghwbo$B z>eC?4`4#ALZ?L5s@&4aNbU{!5&H}Wsg>eD#^ySFOV`mZX554w)5}DZp-3Yw~2h?bg ziKXJ^9kj2PQS*MG`cfUBf4!wD5QOFmSES4W64Ab1b=Ix}it*sjxw?D-D;gY5es^qs z${mw7ig`UL+ylI3As~*_on7@6Z-x{a~SL^S%EI}Xg`!5zIo|w*=Y`(Vtci-@d zYk#8LAo#pOzA=XU{)Mjcz&#(SVEQ85(z0oRi?$p|!;D^7_uYkrtXDKBYA?9I?8sXv z5b7W^gZ%z8YsYVc9vWD(sIKWyXa#hKnbhgZyf9tQ^Q~($tMHEpfk)@-4D4rTYj8(? zKV_x!>`l})Gd1M?;8L*+27ctK`=FlB8^iLAJzp1Le+Fk2fwTtM&beroRe58jHmbF? z7kfe2jp>1edJ=F89vAv6=!1>v+-f<=Q3>CRyokHGPJ!;NX_0rsd@zM^Lv@qLI@nxd zRPNPB0XEW{;qiZbu&GhUVm+N1AZSqSwPzB2o+#vB*FwDChLig#d2|VW>auF?A6oPd-VI_n?TVS^u<0unQBc!*DYO*>4)@}M?vQ`L*3WP zepsm51Byg2@_drUCg_|epn6)n-v8tM+nG;_b2U-mUG*`n@Yi2(d+$#VTg3YZve9P& z;yOr~)3c(-I!ITRxNH{2;bxKL(Y??8G1VvE1LE7)fPM=> zAwy&V41TPHUsnAwO}3m;)~!{zm+?{MfH488Uo{znsOOVvaqwBc^D-#aHF&hyL;(EB zO)urL1F$ch3u4pgx@~W~=V>`h3jC8~$b5WzAoe$FTi=*}1GwGsX7%G^3OH$d8pzlM zV*H}jwPM%)0?=D>-q?r+-JB0>9;^t&Ue_wMq!^CE^|>#Z2Xv_5K=iXj6RsevZZ6O? zU1kY9=@5}oX;_2A?CfVxQP1aS{8o&z;TjA|O;bJec@|DOAQy#Jqr5dQZ({^R}s9K?UT|DSXCkN5v` z4*&7~f6n3miudpP=iB|S^L{(i3p5qf^EvrR_qsy&8Z=bdF0%9f5?sqlUpUtth}&KL z^7k0NfI5U;rH>F7enw3-J#sS;FU<+l$`F`^akgU|&Hryd-)8l0*TaGM?i&XmeHm;4 zG@}8v)_qivWRw^~hx+~1-2GqW`d)$bc<$`-Xe#98RxU`s5P-{dcg$I+x4=4jw{ zM8IUqTXqTc`!5w=1oWr|NJj99dy0U;l=B{9;9J4B5-DoR+uaA>#c~vK^yx zi1+VgCaihct^wSI$w+)_0ov>Bv7JTxdJ`A-{CQVQg(pTbw~Ysm10%LW+M@-2_|=H0 zx)CKBV4MgkGFxwjU11RfV{1PgOREVMZ=k?E+nCrtj>SQXQNNR^XkTw~Aai;?n(Jf; zxSyTw*9Ep-#4s1W^2H^7?i3shM*G`v>)x;TMxpG(eKi-1eev$^6XF(nYXAx#70=nd z4D!59jGrUkzdAk~BrCZLcicR$ZDL1)dt*kNex#tDj}p)RQS^DaQ%&s0u}%{BY<6EW zAN70&rRS@LIr`vZ)~B0XS7|Vp3-&QgdgF`|i)tUI>LJ5}bn97j8ferM5ax`gyLM=h8gz4czk9NaqUn#J||Qk6@$U1lRdfo8L;-0{#MT zMHb}u>&3r{VM-yxw@;NMPsaB`=4*k4!)Rack(~ZrdyY|nuw9v{Wzifswtqx`8|~}G zHVjKR9UJs z5}H4eJSV^Fl^Y()x}Lw2eFBPhZdGhU`~Mmqch+F2=M#IM=y(0|GWeK9y`=231|vF7 z86_j$zm+2W>HH}I+GDJ)SAaa*?;0^hBgpT+upD*!lO@_8C@rGc>@g2t;qh6iLc1OU%^S>eEkGS|2sY3fcpJ)X%4UROi7SIgWZ4~ z?M;lf_u7m^{r-9O)F9cTOF;VbZNqBBiBpnd2xh08PUjq36{~J7e2#cO_YWDNny(9x z;uYF+`qnDAzJJF8WVOQ+za=kem8^odUKgw~YLLIWV{cJ0^80s$?U8cgB*L~OgnDiA>K!XF@3_g>Op zZ8DHLARHd&Vvhd_+J0(ot_yr`ulQEnM}?hVtLZ-=-mkhDDHc^Z1aI5Yv|}4cz@Sq8 zy!sOI`1fg#jLM<@dFsRG_l$|K@9gt}G356T?1@ztjw8XyqUn*GM@yh^pXZxT`%LgW z?dAbhrRa$5P-mPR9P^U(?XQ_@_fjfgXM_C*`E2!V=(BG#!AVYv924 zR|6kT>*5~qT_dt1W3XZKNM-#A5`08K;TN>8H{9aYGi?vlWBPt`;=v&5i)(|+?x1wU^rnEdFmd5> zxBav5WZB}Hr8{tyJnU>#}shsYau7O5@>+^U|k&_e*>)c z`t&9w>^Qz6VCZv+crVeK~3kvPQtW$GyG{bW|WG$u%=@eH6-340`uY{f77K z^3L8Hqrzoosh^D;+p*#lw}@7aFEDazVh_7D6?{M7ow_fa8&kLu{KW7;o-FfQA_h5cz$FqB>UFsva0Z5;!?*7suA=KIfA;nx*#u(c`v;ipwN8@VOP ziTwW9y7tAB{RH@X!EL7Z!4!~by`wa)Ba1Cg9DZK1xDHfy?R?B;(5bPaY0%HJO&>tnkzMlw|{ zEkph=vkv^i3Md?Gbo_&QK1~aF$DKpyep$AxwdV8!G-aVkE{v->w#Xx|^D%62 zj2J?rMiC@Sabqm*VCrlm@*plm6)cRezlD8r-?Qlv?;rST`n98o3NO_g-4p0*hPj0k z6ANfxug6NoQ!OqE@LC#jd?Y>z;({0{IeDfS^+neqL3h;e517{57fyiJ!`d!IyPIKP zcU9$#E&=V=9J**8zXtNVkAJE~{eJN~EX`;4&O_$_*B73s`{D7FPj6R^Ip%~L#h3+- zfLKn`0)Y0$n9>{j*+f}jS6!LU6v-4r#ojm>*-{$Zu=CmMRj|Zp?9rBX+Jo@@J|jWC z6&lF%{AwKlemhkf&jc{l+1?8+O=&lV%PlKM6eUY#*pr zL%o!(tB)TXv&a6-sopVKUWT%}f38}+AwVXJ&e~T~_89TxGpyk56fi0k&b`HfUT^LB z!RCnfyGboY9Aa&OXGVO>q#y-8Y<9GaMZDjkEtq>Ly%5CKiH=dw98Dt|TXN;PBNpsW zOSU=R2)_P`vVQXg%}0G4AWkE{|5T=c%u~Jz*jplzD;GotKhISQICwi_SFh)D3V&Y& znNrbi-_iV0iS!#Db=2>V>=J7be7y=S@UxG19h?W^tV=^iXkTxKpQiYe4^(iB>V%c- zA4mPXeP=Ddq5jX*+e-aHB9w1)tz7*u2>y796*ld6#cthgh1@kXU@e&U#Q04n%)r7>9$Kp-LO4E zueQ8Ri7+c}b;X+*&2KmcemHp34O3@7Ri`~L1>8>MS8(1Ug1g^(UtUK0dbJFOF2r2! zhy9O^gB~<5Oy>K@w(EpDb|7|hcb*LdO))I7EIm|^`_zEojr{(-UbCY9gLz=Mx5koK zNQ2D6gT1$=-LZ#dY&OrCI-uAuoqEp)>)^?(e%Djv_n%>WFqDV>F8+)QGCcl(2;1e4 zc`BoQz1Nfyl04`MaCpOw5`KRfym`8y0}$_TH(E&%UP8STrt>!)SeN0qtL7&qP|wGn zRkYkUl?qd<6!LDW3;_SWGt1xJdtx^|)4$2}(tsB@6*ceO1P@zDFma)My&V@i3YZ4h zVHBI(!RX>L*rk8KU;_2~rFH-X{(bARXnjW3-x?FCt{*?a@XLIX_+Fc zopUfW;pC;e+}@b7>{=Vovo)|1)6uYlZv|+dNoJz>d1Dn(=c4w;EJ5g!kXSNGggMWD zP8>o#ALi*$asRSD@bn3&J<&r3_Yd^^@I$gYP`ym`88={V$Pu%uIYO}|kpAxefkn(8<6rt3p^x6*HQYA1j>(U}tQ@TC@C$#e z%d2o0Z_RN+J)h~86*aS( zC6IfwaA@akG#|Cmv;Wxp0Brl?CO@vU3CP-{j#DPQ3BS3&cla~1ac$^)?z-VBOf0BPwjig$ho zVek1ZZ|I}@5uw??KVAr0g!}*H7ypy@3;g@_|9JoZE$=_@@45fS`~Nuz;eXHLKi>b( zLHx)2|2c>Mc>h1=@E`C0=N$g8c)#F3-|l~%_X`e`usNfi&+*t7K`+qtPGs@M-xBH) zj34Ke%|X1sb^OC+38^JewN`gji(?hoCAg2Tp`On(I*(klfC<<NDuAFRBEtl$gQJ7n{H$0eL(@Zpx<0^ z&Gt4pDJ7?vEEtR(5>%LVM|! z6kMKX6F|mB0M7vG_d7lN`I@bS2*$1^Qh3~#;D@W&PdU`{shVVzljEmCWqUnRBi|VC zv`z>a&-cTvE1NPNBR@6F^_6R;Km)uTa?bH6;{Cef%Vlb@WVlr1=J~*+2r3tZvfo9# zU+CeDlHLRg;++@yr!2bwN6ZmBBjoo-3S{;RNs*yAc14T5djcx%y||Ky{C){##e#c! zt3d0bd_J^AUXF-elsM}71UPUv6lyI)+u>vSHH)afOOR#nNb|u3={49N*o}eaLc_Zm zxg?N(ATMzP`Te2ajrZ-=9)i0MW^ruip}`A`Gclsb?_Z!Vw~kk;hK=bf!u4;d;3dn| zvfs$>e|az?%Vui;q@GQQZ}OqR`ymG<)lYci_q%i+1q{tWH_M9a>}+U1FGtM{i5@RJ z+2r%}$c+_XaH5)D@FlwbojqqVkNp1L+7+vxGHZ}x9Urpxbp}*@@k^aO?1hJ!Rz8zh zqyg!EdCv`*K`8lnBl-s7{ih4_BzvL|zg2fX6~^!jnC#u&x9_4S?(xL^$)|};;2Nlq z_~d&%`1K*Y(gyYW3p1>}lY+?*yYkhr=U6XPj2<8vAl^SXRcGw{jSM`P=#ITHnFh{m zThh_U@89hHEtAZIIC>3Z2Hz6euO=fa=g;PW7l)TbG>(n~OP^l=hG-rs+m8o!EqLO(IMdo?l zHum0z=e(c0`^|qnFaPWJ;#pa%^|h+C+WYL<*WQlraeNN1uo@CP-vB%x%-G=Z+(8Y+ zXBJ>exkmH@n?>N_yL_J;>HWI*7}fmukU?pYOl)h_G|bI@66blw1~(Qnn0WSy41cQ6 zt`V=a0!HQUd2bQVCsE|f?@P}RkK$y@gnVx%NJPrKG2;33Nj^y|gA`cxN_wdE%nx{Q z>w%si5oHT}xjAhb#SKd=N)DJYCKFtd$9FcbIX*`q?sE>v(EjuQCB9{gj1n zB7Z+E&6eRH?l33-x`kFsnxz;>o`5+a`9bz+EDI*ZdSl;8E&>gXu``?^ay|YiT&*v%qd+{Z=Rah9yZ}%a)9co@J)(W_(in~p?P0U>*fr{BuIKDgsHgkF) z+@pfuIhc!mV?i8TDfO(?g)#W;@b4?Gc}jRJx94D<2=a-ldbzyISqAQYp>nfY7w}zH zHE-_s4#9|(k&kt>tFTP#_Sp8H3iyY>WxPkZ0|@q?yKi)C4Y=_w5;p?R<2F}(X4((E zgF6jBcZ9s6LcO1N1{V3{@f=%2=hBHj`0IKsTPE_Ucb)w-GGPR8>a*(j(y~Etwt7v( zEocSS-?T7J@HvAMyLa9!`$B|Xb?1Mj1QCH%)g6I!#P8otDqn4IMfdB7pc_6pj7r~99-F9idCK0?CL5Yp0VC}D|-+cNGfDp7q5f$q>IfUnF^1Q zz6-}%(qXK3=v$l{2E?b$I3WARI3h1?PUevfCan4_0~~&?C^W154pvIaB}D!F3PtId5L>3h*>LO=C8Th z7_;huL1o`fvHfd+uZ~vCxn|$cRu9pPH_e6ul<0iVyU5>P*Sxjej0#tb=&MeRw!n;sbSbGsEiB+= zo!y)@^4pHyW7v0X3|w}sT=LIHdjC|#v9xF+tV%w9%*>q#*|s!Xz>&XSk*PasD}@NI zhVJ!X@FxM!syfB9Ji6G9Z`$hmVpDME664s?;T3pivo;lQJU_A4~6;0GPIupuNDuej_oNSj=Z7=k}v3mL{VdR78(SIhe zWn3Rq2vdKrbg>`2x7x=t&%Fw-x)VZ}UKn7xt|fENb7o=L?jd?Mrd42Q9h%ReYKV0k z(!Us9Spa@lw>{h|zW_V3-J1os8DSzgeTz#u$|K1}sjiPq0E5{YnWsqawE?kQD3j!>^X^)cNDPn=0Ndb zfesK_J1tIZGQsqNGsB{8t-@vwYOS-=2u#V^R#P2jim^)s|71V60xY8>=5m}CL5$k6 zuJ1`RtR~UyY2xVxxQ(8(lCg}4@<`@~>yW>n+uPsPIBXJ>wI#EcpwGGLbr$uuG3MB} z=aW>VAtBA_~=6uvW&3gX}F5`TvL{V5w~udsJE z0*w&MAL;1+sk56=-#6P5dzZ&hgx8M4EVmP%ucCcd!0^V2JbNpQeCpMEb*p(Wsd1;~ z*S8gv1K~a_jrjdtV^6-_wOodqCTtoJanpcfibGxk_4R&Oe?q2HUjrFu?k#D39)?#% z4&E=VKzo4WcZ=h;qSueRZSOPtpJ0BsisTLC?@#&6Fn6_^3fhB*k3G;Xg`-j9k_DbN z*xK{)iFk7=Jn$mcH(l>1%-*uwxDEOHr?$ow4+t*Y1Ym3b{N)raq>Y4Vq z-1TNxDvTE_HQb4KJ_pFg70!-#!IMpM?JfJMKz(ESaUSCL-!jNqEupUlaaL@3Us0|s zm{UEs3in&pu*gK`hLH zL9YkP4*@4M`g*`u`ugN2nU^qY0XgX|)Whv7`dUbBS0jw9-_6&Fcs|u!Wjuim=<{)< zp`h%KBKWk>c2xxNeBxu=Ke3{}pE}k3g?*$ObTGMO_!K)}RK4rgv5X`rq_#8mp3ekK z*eRJ`vC|Rr)^JX=l_G)lzN&lScNf5#)B0gWPe&|XvFq*i`8jBjNXJR^S%y143*DFc z=7`1Rw{+8v_5i(|Wn1$F$Y997-2DOK_sf56oNEn%aEjKpP<0#XT`L!l>hp8LqC&T8 zg%04y7&~hO@cF~shXBM$&e86;ccw2GxoFGfaV&?vu2lB zzMVE&1hcdOpGL!-v9A^%@>+RFV7ZD(Z{-9L*0To8asP70`mXbSKhnBN5Ms(k~nfaw%|yM_Bq8`w+Z;->sqw`TI|g7tL~1 zECKvV{jMiHlOROrO-McB`KT6}QMo*qpm1EK;c)U2T>Q2F;Rii8%$hCVd>49sZ6)g4 zytzUIB4*oYcjmZZSzD8}?i3HfD}4%G=Py#>^q%C&{m9>+Dd_!HDxwvXM(zmUk|cwJ zNg2E+t=utcsxy~&{V$L(QSN^bhS$o281M0fXrF2{<$s%{!GC? z^U$Ap_|H7@XCD1CkNufKf2Q!CdHl~5`7=-anWBHD*qXP*2sPyLw^f2QQ0DfMSc z|Cy)%OqoAZ_Rp02Gtc~)XaCG|eYRh^#1?u zKmWIRKIFZZlOW>x6y{y!%6PT{PaM|d;oUM1#l&X?JAb(2ovL=ep2Kq>2zx|)D7FN~ zoH0By&>c@yNp<(&AAz~Ap9pOkU4@n37%apPzu!1OXmjyniIomg@ElwS+H|6uin(cB8CJu@QOi}e1uy^^s# zc2gko9+`889|e}2))VPL{(j9CKoeoM0Gle??naC+044Lo>_*nExDLg8fvs@~ykt=b z#n=}hSE${BHtOr;bsi#}D@2^qQ*G&E9D~53Tm7Ra>g(n8?V3q?L;)>lyJk-HG{e)` z4<(0?-mhnrdeuyV3iHSKo1zb-L+aRl^xz&m(FTys1tx5PH}J)GjQ@gO|=Y zk40|QH3u@x0-h9cfl(-zCX(iX^nURey_aw8iNGLW@%Tml1@Jn|jPeNi`;`+p-(?rf z!@E}f2}L*w{$krERE_$2T`hVO@)1X(5YBrcJ^I1RvUtNU^| z4^W}owC9E?;`hH#Oq)cyGp;Uede0?%;_JJI4*><*sC{(5 zCG#7eu716+9k<8(bW$}fLJ=4GuCHxL{3Mt=!#$Xe^!_I4x_WHiA`Ejlk;ukQgx4*I zuW6a>@msO)H1fj7z-;-IAg?E=kLn~{gT#9~JY>5!Ca5tA`L67tR^_ZhFLgQo9VT`- z>v{P@4HsI$rze?a6Hm#&fV$(bCF1$q^PPWRIFSKFmkqUO*{RTsmaG(p{QdQ(*u(c6 zn1G+`k{p>(pPx45zY(cyi+|3KKfhOW5vy!OCo<{esBf-{>R6_A+=L9_DR6 z7<`ZjUO(^pA(&``Ytm-rSA~&*iug&+JicjoB>CkLB{>`1>Gs6hrCC(o!Ct)W#0*gHBH7Hav+|jT9d?V5V*Ao?3c3tTKwY0BN z_jMw_;45K3-)oNVa?B||Ch`Gn(R*KR5KVz0j+I5T$IS6<8#+C%1>Nv$Yf(^*)CzbJ ztZcQ--3+G~te>0Zm;^L2K1b-BDDXV%wSB^0O!04*FQ!y6&p;oKt5g*)BB-C0+a`wi z{b3d4M|`ZvKP};LQ5Er}dS7?$b2Bx;_tq>tIz?E4B^uHWEs5jc=djT8M@a84;|cs)P( zNO_6Y%kXEWo5NRmZG4xbt|mVvb?H%O%-Pd$^LXU3-ORY{XB8ra2|*}W+A1ZzFwxC zqF5Npd93X;d@?^Z2Irqw+u3F);f=X_i<@rFg9*Uy*RX8`>~duqqe1-sg`D|}wWmWc zI8@{HS8Fo#r+w_4IHG_ve+iLy$ZP|7qKbFFoLmDm_A8880q1ddW4do!3_ifOa<9rj zD(V}}>0Ya$m&ZAF({_Gc{SCi6>)Rf~DL^po^ge)iKE^dWF7+=BfQKJQVsi>BFdaU+ zqj%>F{!}CHZO{c0Tv**j3e82n?v%4uH*p!fVqNMsQ`R!bdbD!kR^J%pvV5XjJS2s` zXCU7Z{-5I+n#}?;#4-WbU#O$A79zM6FGb z;qiml@*WD;!EfR3o;aTs!IdxhrF5cP$Zmg;UBDT-*tWK8B>S!G#8(o zFNRZ4Gi~wO%-MtZ4hqxV1T&9U%-|2Py}57AkfT7~ZU zyHCDNX2SWm(0j&8j{u(O;LHr9+sd8}9i;!iNi|Gy?k|%fWkanC%ocD)TgZbq^KZd1>L|f;dP9 zD>ten7^N_}r|TrzUJ5A9;nZ;&LDwgj`5XoD`wunku777nhI%i%>}aw&;K?IfxPFJo zVY-*%4LLQ)z)3mrKpF285DTw;kfwDGBmFR6Sqxr;-AVGv-j^3)kc->0UMz-1a}vIN z_&@~Ax24;zBORJKDxCkwv^<9Pqx9=71JLH9=z8V^37Wq5I9+V6fJyn$epeM~1GITt z^?zz3?ni_~5W`nRY%gEu>`|W@AnQxV5*|Q?5&A?EK0YOE+qoBCphZ7as@{TWj^?Xyua?LE7pfv^FEWUb|WuOPha@6W*p3uaa;+$Tc z{kjJ3YP(w|o81C*dkQb)BwWPqS3LOTB2R^yqJ+~`-i?UgF#Nb}N(;*finh=ACs$^! zL*81BI9LJ$CFKR$SpAtEk@6uT{J4{;D2Pgg-pTu~jalhn6|6Bw4JHqj83w0WU%K=Q9Vn?jvqtDo2-+q(0WK@~Ub!uNOA9oqib|i0)5S&u*5C>0@js zYb>;a`hllE4MAlb^`UkG!OrIf*b%x@Tst1j!}#Y)VxLu3KxWKItD6@Mv9SG;1SZ7E z_O9&x9e{W~cRmO9?qe~+@Zu)B=ED?t={?>TK$!r7;e{Mz#PgB=rt-EahYUU=iB;$D z5ZwFyuzD%-_Y*Il-7G(_4(CMr`^VkB!BpmkI4Y+JX0zo;S*<)7kV1|;>wne?l0VXr z?U29!+jQG6vqsdL6Lms5_}(Bq@nJE&>?-2-Zxnl8W?lxqxy`#j?IwZ*L4o?+qGp)e zL>9GUV;-J2$lb#7jR@%;u=~?YnqeY9moo7S`P46)8XZGD1}SR(52F*!F^iD`&X!l5 z5UlTJt1eoFb8(%sib@vPE*IUq8Y~4ss^(YF*85cOgT3vb&!z>o;vKuyf8{&yXp5cZ zy+nnC^qsA&?=7)pw{gKvt4a9%U52US zgbHuO?McE+tLsIRw}F5>9w&>9d7dsAwlTMB7a=D!yry}zS3p6;Fx z>J?WA*FA8y8?t;e&K^K|zs;`1cW{aX4s4B~6{(#9)$Nb&9YA`&?(s|Z$xoNytruGz zV^Ci+&V()U-Ll1gR=CIdv`&DHxX&zR`6O_x`se$9^!^XEe7g>=_QG@f#j6$+*WiJ^ z@Jp6R?@xLvaZjh|6)>sfHVn^0?;~n=eT9*~KT1=*r%>%PIK)_V`VrcHM&lOP0x!;1{79F^;jCwx zQGtmAmVWN_nuh!uh~Wv}&A6`{FqDZTWtBQ$jKK~ot>PpoOOiX7p*97_6O21QBfVci z_HuLv{SwG9)e;&jp9eoPADTP5J7TOs*_J-dbI|y$=ynqIwVc`)3as-rAwo z3*xKiJO#6mk7mma6C333FS*}#W97&@=qbPDm`o@YS_%*HD_nEJIBytK{3!koS&ANe zqFUz!Z7BhHwv4cF67r5Si|FOOsF-bGMZ zz5R~q9cL_0S-yKY67A=GZuD=tg7#x`V=v4HoUxaUE~l#a$Y?K9xLqg#`N9~iiCkw~ zu*;S{im$zvflUHV<7GYupYjol4#&7)(`yee;}v9x9h$Kod-w|o-k6TfL3)4i%=C}4 z1605%wAHGbr5oIPeVtnu>HYI6qV{ErWOTni!_oTTCp`Wov#1gE^`0*}eWnEE7mGw4 zc=TRQ0n*?T6|ZGiEHTYlBrA#p6S4;_54kKa-AC}6ar}l;ML{m~P*{h9yisDJhT|E=EtpKkqE@BjBgJn-jw{Hyo>dk}y1{(sNmuipRf zIsDc8|2>EQmEQl~KJovy-aq9slw2m^foqzCh7=$ku!fl`_1LvJc=-;m+kT|?^FH5N z7mWA-n-{L$Eq5h?sL;wl$sl)p)u89*SLqSR_e{Cn?>gd<{M0UAKzje3*K?-+$C|-Q zOUAGBLlj`>ks|0<M#B7r!OE7PWg1?X5HQAuKU#S@ejiW)`8uqNeu7CUD@(9f4~xsUw)4}6bH z$s+&s+iq{^XwD{Rl)m!|3Gw@{JZAtMA1QEyB3^&jsSwsxc`qBTJL4Nt*XKfp?r?9kk6XPh%J^;PlFRY+X0<`&1s;47JG_fx9Qc+AX~Cpm0OfS)0NqY>#Z z&niE4am+j6o*zmlC@ZL6i~3=tSc(L@XJwh#5x?I`@rhg5u3?b#;*F^0PcpFfv`tHs zcft$v#++^R-ckzq%-b+!hxGmpQPH5J z7rh|2BRB8q2J*8~hIW*SJK}=gXFjeC%)q4$jmu(jOF*gR2iMwCNElDEGZcWiO%gO9S%F-I`p~^t&7(|qfFN>;=6w6 zAgobr@w%p0JMQVc1;?Lr*Sa61 z!qW)`_HC%I*I!-a)s2Bs*t0L<`LQh&@SrN#3e&R1?-c#c(?&l>uywB~4#h0N<=_1~ z)=*!sSX^?w^6fb|{_2sD5&Hl4Yrp&DG1B|NmhWr*sF!Yb#<0`i;w)^wcUCMM@%yz@ zhLv-XAL}>S`pJ!tUBJSkuCfW~{R#5}d}>E2;1}~X>kEgk0AgTO&%#}6d_YR#+mSRX zJXycw6zkOun+Yj2EQsG9{wa<1PA%}sDe0}4xW0SO+6TbWDU zF$Ov=g+>}7z2EN>_j;w!8hF4v<;+^y4-clB%37@$;ZIL9xX%nC|ChMC{wtFXSX8q) zAdCF{=4B5KYa!h_oaI5*{SQ4L(3CaG3-Nq3X-Q+Y?aQ#{ZOV6rt;6uzP4#;AJ_G#0 zyN1`J$nPbwgOn;gIuCRr$9Y9X3~U5Cy$o;wm zbrsO~G*|z!)WwUA-u$VT;CoFxW4Q0Gm%$qR=Amrm5LyS^`GN~DO%1%*I#oFi z^<8W37+u+xQ3p(LaLr{z4KI`okJ*oUAC$8BV)UE3;Mioo=XKQAo33f;^za(uBWw#x z2$P%#R$phEelV&a-#qu>M9Wo}ndQFM_}Dm%t|~CNUaW+7JIYgj&CG*s*MF|dBJRtYH1AK^pi}$R@>sA?jx#X_DJmM-yUWx$TIo@;`#jG?7-#3mf^m#b&{pk6o?YKesJ}$1g>Zp84xT(0WX6cb{sSFgl)%i zau|%naW`d#yneL*>|!^5d64-VOiJ?7;h_=1Eh6{qQ5GeG35~IWXon7fPY?NuA%1_~ zWW*+RGe>pu;v)Pm#B-Rzt3~hN0tkDd5m3=_P|c2WEAB)23hT7l_+moyT&X0)-TZ z%K{$|FmE@`Nb>O+IQY@$ig@EP;L(umrg1-nO+6_QT>82Qo(&{6bEObrU-8=QzI7q& zWo@bW!w<`_f@OJa@blHW~)kiWY?^M~bJ9fz&mSe?T|_pCrw8nF!go}6-$^CrXHbKBlHBA$==1TPa` z%mB1)70nI}q5xaH2F>!@%GkjtZesjVlOU~<(e=lr73g|1sD^o61q)v5lWxyifVsx3 z-M>s1fV}&wqsNfmFWWHOx#hw#aCzc8d-2dbJhM&DVYpKrI~vuJYqFIB9Sm(y(QpS4 zH&(~;2%6a4SzelTK2RY7u4^e0K zL_;DtYDGL6;Y|W-_FUQPyt>$_N9tkAFDBu(!P>(C$|#R?w#QqgQ5QQCzY_SCG6Ys< z9}wB+SAhG)vtF*@dYGDZ{Vwi1>7bW4mVT_40`V!HO$ljzY}om-p(War92r}0`6)~R z5p#Bwydl)%n_fY$;QJdCUhH@ZOIKmT{5F%LuMM!zvQC^rD9`E_GmFjkE(7`0v$mm# z=i~S-E>yN@9vFVUe`OB&Uj)4MypAD%zlsc@BcPHDjmswz>F8&`3ICb8mUl*2q@kZe z=reSGcV)1}jt#+jkK))8M`P@rgh1-vPwSBF&imer*Xm$j#Gnj6;`#K_^~bavMO=O6 zq16E=k4@jC}_A1feg-~DB)Fr;QmRusNer!4J*Jp-tmG6s5Zi28bKiaiDVI5vRG?}7Rn^EdGE`Mz#8 zZyRiI%$A1r2o+wS&5jqc{{dU*-G4u5u)z|wj>kXZK>5H7LD_2JG+@xO{uU-^i`~sS z?V-(&cvu>AmeEbf*$&i6HS-lVc9@_e;=>zHKWf0mV9=SLeRXLtg5`=nshBKMxKx zo1mVJLz5St$vX7|!=c|jkC5IkB9IoTitg9lq8FKr{2SoTNY2vz+Z`|p?`MsDi2tMD z-!XQN8HXX)6vdkmzn_o#udK-IOX(j4{mR&;bZFR@O5ot0@9v5Hl=PquXQ7vlMpx;KD^gcg-EI>nt7D^-X_mAHzKiiAm=lJx0O8=4` z21hH?Zj>Uuzr)r#hG*vraJ*hcN7^zDD?;X<*2KABy>EoxZLwd4eSS-abe(^KOFHK) z>E>K88W*i)Sw0F#2)nKGY8~~w(M=>0k>0=9So1|mivo#fIHqsR_P|r@96EA{=W}TK ztjcFq67ZKle_~7cEYPB<&T>cm{{2%YXSgDX@UGG8*R3e$G9=vA{T}K4lr%P_vXhIb zUnL~2v3L>G>36mHA)b#2sWj+x3z5n%h4;ePPRv@^)h#+x` z0!$3v^$%LRWAyKgTlcvC1`ki}JV(+Z!`WK-?R(4IF~4vjJJHnv$i6hZqdo%l%)C6) zmCxmY3C}McP>x3Z<@YX(DP3NIHN+kxBI5bny7Bz;u15<{fq~yX8s#SVgGy}_z!&EZ?-S*phqu(< z7*!#^OWUzRajp1y@cx{jLbL=4aBp947QTTvmu^#|{FY;oaCeEZQk@ESnJ3!xq5djU z`p=6DU+RH^(ke;+Cj~HuY)jujeiv%|K`LMMJFv&Z`d!RV4J$4MpN<_q@$5UYE!z}|Bk>91{B#%Uq^a8y0g-7#f z$O8CsLy|EP`CZ&6EqLF~ErV%=U$aE*1t`iM&Z&xcmb+-|KgDz+-lm<_7~7>@u##H# zA}h}Y@BLUVu7|k(mL$L5QV0V5auqVU@c+Dgs>4c@;cN3-5eL>H-?NY|{gXXGyD^R#LdR3WZ*OO;sEofrHV5+hUcSam9h8t4=wn zuVrL*zsTe~*tgK+E4<)@FP~}n;z~OY6Ky#a>-tELSKUjT7x6434LIHi#SMXmBT>GU z)u;zttNddq=7j4>x_GFc>4y>#E`kEw$mfu#JL)jvh%*$Oc@|2jfun6fo$F|iUf#_k zlZ1GdiTAT5Ctsp|=fO`?(WO-Q5vSk6fcmRiJnsejXU#!5?oUi^#3islbh+>c^1FDM zs6EhKB7z?>8=+-rpBpXNF?$sGU7mjDW=rKjJj-3BP^WeTDB)y}%tH=%rCIQPr{y(Z zpYC0D&7>ca$H{aW$nSD*v7zrF;_fR9tBkdJR03xDP?HM8vz%zS9C+Mt4bTO^=#c}t zKuuws+KqUYFR$n{Dm_6QmAS%=D&HRXrQo1HcbPq|J4S~$KVAjpezFD8;zQu9`4Pe+ zN1veq{1hWmONd^?~+0iDz7vigDK5*XBuyjK_kCv;v)@P+%&L5XuWpeO z-^Kq^Su%N(426Fs-QK>?4v14NOWb4D__vpeN9rw6Ul#ScMD;)ppqG}dh)4NWKWkHC z*+D8S8R9m@lYT)BTm1oAlyB8=yRv^aZy6l@@%d(<<0Rl7+mT?DZiO2p3BLSoz5<`0 zVyP*8HVYp$vZdT#vc%T~r`x38&VWGHvBn;-0_L8+zru*}t&sv`VaoCd+|rX|WQ4d~ z)fEUxV3~5{}m* zze|3Fn8@eOKByP1bMTwo3J|%__O1Jx8GeXHKJneY?I4ls)@^@FQ z**xM|GF6OJ+i0Vl`tXlFR}NFWW7*sHqaq0e2Qk^XqU-Tm5=VY6%D2j+>~Qbn73lQ- z!;Q;ZN5PWv<&VU-#&|U@&L?KO22?-V^cQFiK+(OVaYl*}E?mTWqvIzP7M`FzQy|a= zn-y=mStGy8k&!AJgM138kr15ide;It$}JkgVhwTPrePstItkV+Y~QLPH3)Bu&h>*| z2Kb#=Ive(tB_P+eu*Rma2wV%Z&fh;_fL}a*_LS%39Q?L+JJBR?6&gv^Uf@Le)~B(_ z&r~jtfN+bfTR%|#>43Knd&?I+{H?nNS9fSP{Dm{DM^dS9s=1sfgm{(#kG_^Letif= z{9(z_MMAH;w)>Ox|;Rnuz=^UJrQl-PloX_tlf0)18Z88&gpp z7poTTrg?Po>Bchn{)N%#So##?vHhMVk*SFv&$i_~m$U}w<8x$4Y0aSG7k5&wss=8V z@mx;3Y7Km_{Bf1UQUiW9^#?WgtKm*xX#?AJSKuK+ESrCMH+<+h;l(k@7k?`rxZH6s@1Q;>ryyusTzmnuBndCpb}o9$j2}^Fb`ar-iP9& zD*yqKOt!0Dz)$gYxxCaJghO@bPrkWgpn%^tbFr^czLk~b z4dVjK2r%}wD{T0%3eB!_QL7)G!8!9MijrO}!@0@rVPpJ6urzm}EVV-h|0z4r&O5mR znh$p_Jkywj$+{0r(#ECm&qdehmkupMfy3s{XzC|`@pko{IR_>1tn43o&)<_lNLz7^ zT*o888nEY%m7_S`9p%0`QA~m2s_!m+wQYnEgI9WKw~63yGHN<3&r`w5mZM+o89Ko( z>w}-!G>+g2Sx%6jjSLx?ggYL;pMV}OISP(4AH*xJau_Gj&I98R$xj<8=>1yd_}SL) z+<1wWTCflOC_Ld=7r2Tz78zcF!3q1&f^$qr=C;Zh_+Bq7@vVXa?!Ei4R+u>i8JXh) ze{I%6;WP*SgHfw+(lIn5!G{jxaS>2`VEzrhOu{ZRAYP=tqLAah?Ht%c@j3TLzq-Jo z@;bIf)Wh{oWzJtPmw=7yb;i1TO~agM266I@70}(^GWK2c5SDXt!=>!n0%&~6w1oPr#&x34{f=3Nd&yj*!WU-1745fM6V1giylj0defS#itXz?Hd)WX^L z3%b8^QQ$nMt4m)@3-Z7DrGJi)!-|#Z%9@WL-mv87yYz%<@Y^IyJj3=JmiNk#teig& z?I+y@^AImGZm=O$Its%&xp_Wy9$N&PBVO$sp{pR}Mczk6vOK0#>pvV-FaYhWY=ZI2 zXkX8H+Q(R50UMRuNXln#2Xh|Ro92H}z}Tu+9rqVS%ATLAbfhaKzR#mL( z5^wdQ$RcFgbATkeJP$bS#OU%3)UdPoh}RB=Wgz=R!D74T9GqPYW)Nsp$M#H~BD-Fs zK=tb*6K|Z`0c&3psZUT7lc+khtFn&@rXIHkw@bf+E_dGFx%~Jdmib2clNZApY=E)P zziYI>1szRcYvgzFvbB2?Q$z;*-+9^J%a4HvzG^nSsK1JVCu#Cs^dh`NZvD5xk9%w;M&hGFJO|Pst#^OL)Py6Vf#kaCV)?c2~<3JyD*|eVvS2b zG;&qH+=TY~4V+&>r$_X$${t>}n&ZC_&*Z-28Sz#4N_}Vfhi3-Zd9|U3q2=?48(KEk zfiHvia&`Qlk>5pw$bG{=Y5|DMdu*NMM4V7_$_ZXZBW#)Zm5>w*>O)Pd;%HMqJ&ZJT zBN88suo4N;-2L}gfrRA^^-;u?YYVp)5OXocE{^&IC75i$)7*8H3#N7O$s$$bI;#nm zEOtph+<*)Uziq1;o%jI^We(-Ix0zsOb|gxgIt3bb4Y~x&^~0bSvy!5IrkKY2GcOfT zE?9Du#CaRTQC5-rZ*7xSO zFlSIffy?%B+E{Zed;hK%+y~m=U_imr+t=uGwZIt)oq`2+`l>Qr-JUoQHJVk#^hrXv!pS7tZQFQzV2;RyWEje~8jEaunJ~Qw0=>@r^-VD?!^cf-WV{gy3uzgY6>?>*zl2QRhp^5zm?s@j+O z2=!N8w=pstj2MJw13~T@Ol#2eUSUJgNjuE+IOqGVljWdQe=zVa;=F(HtGzCQc$W1U z>oKn#sz8*g+|0%e3Oo`2%J$r}9TuT5xrbQT2$zGN4$^E}169%i_S}m0*jeGGGbHC3 zAX}zy7Ms5cfA4lLpOu^+p&wlEs4+{OSH#|zj(}TH6Q_3QF{4b z&lh0Yn#e^Z#IsD2_&EFf2N@>w1PNSH`;GFjrRpK5zsk+}R?m|GGB6k4cWET48P0!H z{mRSWfR${+XaU_EiMj)_<&k4Iw)a9X`_ZrS!>?r;+ zPlm<%O+zpIov<+VQWvGRT39yl<<(5bIbrFMDAxL}f6t7ALfuL3)ZHD5x|IFu1RubhJXE-YsZLNk7^ z!a$9|6Gzy710ELhdxa?9YVfju=iPHuaJrAqmDju#9A19S%6P#Q`*`bF(k>ecTwnhr z#*#PNn8XMQtO*DQ4VGISo&phmK)YD#mSwpYY0ZY%WK_@xS3;ZyTXjO zxnnmjtB=LI*MQ@PO|BIwBR^}Utkz*0cdV}5DwS0N^}-R%C7SY9q089FW+LKQX5R41 zp)nnT!yP`4HxE*Pah+w0I`X?zo@gJhd9(0OCX*PYnPEC^1HlXRWk2ldSH^#TSn@yB3^M&n$>xgQAkI-Q=sH74b4BlB=l%#IFOdXi{?>D z83h4h=H2@Q`d`q~5c)SaH^mBQX#SalGz40@E%aNrZD(L)VrF4w+rhqb*KQ8ZJzU&- zdG_)0?I#={@E;TqJaqWT(PKiw$3;$viiw{*B_Sy#eOg9V?#$V903*nsS5Um5q^zQ< zrmmrRQA=A#S5M!-(8$=t)Xdz%(#o1(V{2!B$-&Xd*~Qh(-NVz%`?Aj!U%#vV*8;BJ z2n@OzOt=*i8Ww*0&fSQ6_a8(?J$w`$6B`$wkeKv1`N`8~&tJUU`Ra8_YFhf6jLf%L z**Up+`33*)qZch0`{y|{{_{-!zrKQKXlVcWvjG|c&5LurH}-urSxuDcxU&15&ium@ z4Q|YDRmblf{$ju7wLGE!=KgYXYO+k%)jfro<3}9Qh6QY2^%E?g{n}^xxQok>5Z}H# z<-jwZM2={-`wZdq8>@@cqm@Njm|oI$9RqCo%Uwle*tuSf1qsAaLZH;O)5(0$M0+bNwyHvPm)2F?n z!$*}f7+va)N6w;$t#WUudgqPllopdB!t>os8cQjPt!@(@o{56E_mGT{wWs`w;BGns zcXHC%079+`KC~^+bId<0IAqTOAFIZ#F&04zACr=uE{-P&9Ht}u6r+H}drGNr`Z|Pgw6<$cx*x5(KOUj}WZP&H zl@R>=v}fCLf^hVc%ip)E-3nG_;yo_>l~;ihloPwQW0^?U9mIPHJ?>rm+A8=91W#JJ z{(Pb#9(aRB@d&fTQ8&#R!U)IP5`2K}IbfNStzu9-eb3$W= zX=wzH>pn2xa4&wJ!P9uUucjWOF4XKtNSEhEatu@+nZODHQ z@?vWIHt;|Nu<&aUu1qX_0FH#xj{d0~u~x@8Uw@;0nz)hGbLhqUlf3)O_Y>YtO>T`G zc3QA$q|X|7I$F9kHvN3B8+zDVmHYe(BRdH9#!upu*we<^DYEQKd(YSzafb$$T-TLp z$Sry{shfk)DZxbWTY<{TJtCdeT|+xFarQ zJ>!4VxQDRk#7rGwck*svlOW5?#2kezAar)}C*Sl7OYiI_F z65g#d-kKE z__mXY%6OU8g`Z`6suUv$#PODE;gO7nTGdXvSB^P+fc~qlPx)^+O??YI8SRo!E+2mW&s9>TOHZhduWT%^->6vSc+BfgFnMxvQ*+dw zw^|!Ysj+`drFl&763-OvTM>Rm8@50y+HF1lg(4NpE9XtCcvEMjGPdX*A;ZbP$?}t9 zgv`YuW!)z2Q~muDdYvN;m1VDPzK%F3ltvI{GZj4?``wx**Mj+@GuPKE0ptCw`#{z9 z&d~k!IlWUgdwR!U<}em;!(aKIO}%Hlss<=RJJ)q<*50R@!u zt|XH7cq5nJ`;Ak6>A|;XLeVLIQOLazgRr}}JAnP?4d)NE}vAC0)3;VK03d}NUv`QwT2<7}FjP#P?QuhWC^YSm27D*v)+pls^^`u9s8@n8hb z!}MtqPe?n4$F5OMw{F?XwQsF4TurHYK(S`heti0En-FOd{l{E1^9s8~JF%|fOk*t~ zgXX7nT@YjP@Ooezc2Y}@h@XU}XcVLVf!MBl`>gvoD}UaV@d{v>W25`(GVE5+{Pu;o zh5Xso(?lz+8RF6|bLo*@hQK1lai&v^vtBW(Z5C!R%Fes2tDI_Gw+E3R|FT0Ub^~xE z|AXyx`S?dyoZ}+$9gZ?;R7Z3c^a9SmWQ5P4MT-NRPWMgHk$A#y>ihieZG*;p zC#6I9+X!E@2MQ}&5R5t9Ul66-m(wWM-!Vakx>@g(0aB5*l=b*K z$3662%VyttdvGPQL*Sn9~)%U z%8fsezo`VxC(0$?o-OVozOo0iPmd#z{GMi)q?xPNm+z3Kryx z;oqXRCW!nbs<>Yg8DugW1?nvQ!NsQH$!Ehv1f*+6G%{FQAC0Tb33qqrL$;90^%ECl zLBJ4w*pjcb(tdAsYy|rpB2~zfR^>d#SyJqQF%dTmUk~;rRSBLeo-Q?CpWH7;1N9-{ zUZkJ#7&;*p9@zL7b_&w5GIc)|la-wg;`dI;BfKYiVzMZuqIJ{`GEjIgeia9>V_kfH z|3Qj5L0UO$Lbp`G5JbWq-hBA;djD3g$8c58HHfZU~6MlE%OMEYXU-)*Y+)A+e)FqaG-PbR*t#Y#K%?Esd~JQVr0FwYil zc~5htAO8~bfQnZwva&Vvxwv)%wHk!vL)>`PS}?~CYQ#;XtBL7N-z(>{7re>wzy zZ-!Gy-DdF$2cS-R%>`G;ni0Cb$cNTSF#JmYFMC!n?@7KEMvo`+Rw|H()@{VL?Wg!U z`ypyqB;!V@48f=s-)ZNKuwDAVHI*XpDV>LBH;iuG!xMGP9K54Z_Ag&V;xvx^eLyJT zoIUaYXw@L@my6Y`XT%R3@3JV!*(6;nHrx1*D-5IUwsywAswj@Q#kEHMH~u{$|4-P9 za$pshfA`9p_H2VUf~kuzscI$Zsg&&YoR&B4C@>m6H#Hu=)M}`jM*IabJKJ&&m|nRFY2a zJ!}OH6twOGwZ7svs!kp9gAHeE7xXQ_s12up{wHc1ElXf=;au76u+3{^#w~q3y|rQ+ z(QH8tAU=~0;}*FqjqbPh5JQLv+E}bPai!j4%=5}gL>AYt2N>P07@ZH5*-T9Hh)aAQ z3wWbuYVJ;g9XIii)@z(9UI1~AMpyaRc1IEV8uD7b(f3q7ni>BXbsj84*hh;@)t?}! z)%lA8DZ~x8$zgZH&B-NYli9F%G&t&K$doXuKmhvx8O<3tULU{HA3m~X>3Fl`mBNjJ zF0)Dzm|L5i%(((o(=@D9GAphs3+hi3L#*qCbFPecfB*V-?pJK{zIB`tC^X<%8mGO1 zZ|IEy_G_QzrpuykzcmRybgSN5>=xAC0TT8WzD+tJGON=!vaz3(ER1*HoGIl#MIpXO zy@4um8UY;~(feFWR?krLuiESEHCN^Mw}{I|Z<8SG7RLCwdT=Q+l~J`hfsPi2B;_M_ zmn+nJRCOt5OXWq5u6tpbj0B9}lbqD5?Y&N-?No`svE?86Phoj*e3|bbS4H=vmX-s! z9IM`H>yurC(&UGj-5Q(s`N_YQcaI`sN$QBT>CoYTgiNX&&Isc@FO-?vO%GIOxrI-? zhaZ7^xqG>PCBDNbupEr@s>BIM#_?cW6sI!1QCInfj=}Ud{5nhi{q);?J;3q;pKmZq zD)l4|aVzbwo*&;t<%TM?|J3yv`PRYu3$UqYd?L%?r|t99p7;3WPwet1Icbv%N$8;T zW5C-2UjlGP0B@F<)#E?XJAe@s?;DZ|&njTuS3p=?N>))-L&w0x!p7d&&C53+Bs@AU z5u{|~lM)*q;OT5_tfL|$&d>3lh72DA@%0hhU7a26 zZEvhB%uI|9_I0nxgzs(kX#*jx*o7VQv9hx4TcQBfW_On_3fIMMtm~G z4LhufUjT)J5Xo2I=Ixhg>G`-VfPHp5?LAo+<_Iw^|>Nrh!Dj;2TlrEUHA z2~ee!a`>g+4`LM+zmBuoi~1{x?c?*q=SMuyVY0;7kAhiRG?5rP=d~q)bE1#l^E!Q@Lv$_R(;Q>CGLuK?bJwJdvg6l+||=zI}Q)9}$|&Q5eJ5pv6IBm%P`=z~?B>{P!fO?8`E$KKLQ?e1|Cp zw+KwQ8#WW*H&kN#>+IyxI8Kg@vDjxJndU%W|99;i->EW6BbAD%Kv*{?btORvii(%e zz6IfON8OZ&l=th~$5z!^d@U7%`w6XwW0B$9q<)|%)Kabpvmyx67wZZP#KWDfr* z1uku0s??TQc&tgkOYs<>pw61Xuij^hm8|L%exi5W_X0;us@>^raTQD=)e#R?*=zx2 z#{Y0DQX?Zn(cZHJJ=g+KO#a}0Zwo1YvJ@VZ3|qAQP%RsdqqbXHLyW-Wo+}1$na__Y z7W={l+$tSoJZ2cviesJKCwCb+g^G^GTok;)uZN*+v&8ELTp7YIIXXgS&h|=sbezU;a( z1)NHh`*eO`9OSu``AfmE++VCdZ=>iu$B2nd2b**JtpV_iltUN{5x>mV!-|z@- zKCGdptU|NNF{i!#u0;nZ(M?6>k+43B2rbjoYDp0s2t(LRH0hdx^M(cuW@o@+`*Bg~ z09vwR0-HW6IC{$!t$M9k594nM{5!YaO-GPRvQj+Qy%zmtf=1GoQ1Km-kN?(a4GX62|(o0hC_um{5DNt--@J`ck213{L_XvUghU4AA)L`9Y3O4 zxql#lq&{aFiQh4Q9!=P#xS1mST&qi!`YK#imHtXqD_j>`MNzk@x{9DL6gUYW&*a+i z!dQ-Sq(PYvv!%S44zebJC*uL;ga-p-ya8|Kjf77R<%VziJek&hNZgg`WB0f-fR$LW zn_s$^-L7;?F(Ow*+UT7Sw#J!Z#6!4yh&6!AFn#ijdzy4}PXo1h0wKYy)#$^fIZ0S*Sn876w! z?t?USi_uljzuYQ%C{Q)DWY(U77*P4?r_@#AntD70$*eD0eIFY z$&~;-@bD8`{3A3kQ>!TW6tJoF4=qE}QMQtJO>6)1{47Ur&hU_#)>G%^f`(`e4B#qm ztS$A-x*8U=YNEM)_QI)w=^Sfry`mSw6Fe~G2JH%zg}FZ*sTpL>SK*YBm0n4EJKs95 z+25dDIUPY^kpt7<9oGxD`_?mE^Q7&#b=8(g@si&5Du>AMy(DW%%xsVef~(qZ_jV)? zzYc!_2B{XQlU17Z78(vE%M5dal?%sYXK8EhXJ%nSRT=&GI{ewU*bFbO^@>(!v& z-4cKSiOZ5CuM-~DwA{XN93*wi<-ND}Jgz(u{ZDIxrkJT9g~M!4Zhs1qXbGj1v+gk9 z^|+fvMH=;JU(aMzMT9OBpmHJJmo^Rz5vXWn-oEk9c5y`hN^)D1jqxWpGwZ^A0p$FB z2u^mI>3Kt(oq$*pu3r-&e>A41i%a)Hvd<1!0>DNPu8}==)EvaEo&|IJQs{29CDl1+ zN=cO)u&x=t3lxOqBG#np@;Lf?gk-*V(zJVR4^cpK>3%$pgi4vD(F3g~GzOFRI-IUV zQv1EunKN7btIP}Q2@fY0m&+86*MEWd&Xv^ru#d}rEh~YtO%87o-;I4*{QhTBmg8&i zw2AjEs3EOr_N+Pv62)c$*4v35Eh|}q{yA@&Ysk8A-HaE$03*FoRlG<-@0@MQ^fy^> zwkz?W!juwhPyAUJI|dSiCm5Hd(Fbrvl7`GBT*TbPrg04K8a7O)<< zv68-!wbYX{Si*6uK)oKxbvipwqco`;+ry>DvH*1J?3ad7|4NJ?sEauvFh9*kOiRt@)BKbDk`?9l;9be5eK;@qe2j zSE@hdeLhX(-wb0exfP3Drg!Vc)G59OA~B1!E}iIFg_lBv)iT>Y_?4eGy>WG7IxlE6 za6MN-Kvo~0N`&E~4B>8FfI`f*Za01Gt$q*VkNYYU{6U`&+aOuyrP_>e*pF>DRSv4w z0^OpM*kyz^V38wcPbB?Xz8Jvh)MHw8le&%32Q72O9*AY$Dwe?MHDcTJK-lcdet>@1 z5x>J2xlZ$?K;}F}a4VW+){}C`f#A0(QnmVH?k8}ZEU@{F<=4Gn^u{~aE>ykZ2jTEl zX2)J6&4Ob=P@w=e{74%F*F9)0eHspUbnd(_yG>RCyrpbjM5vd3rF@72XL^G)>Ck2a z%-PS?2H1am-tWarKGzxGjt6mVr79v+15-#6rsH+wmC1VAt!j}CmTihHT=>kQtV z82Uf&06$5+%(gpTt^8pSmTF4lD~TIU%X7sGgEk@j2W)=&4bU4Z6MC=HPaBJ5ImZ_B zP-=lS;={a~B5_}8{btmc^AKc07K{N3WJS0_uG&kS(f>p-V+5{p ze?Rz?eW_FgtJjEQ7l;S^X~vxKJB(1ORiN@^FVY0|B3S2qsRo^iTg^}^RbTRe^iiVV z);GrTYsrw+cfQ>OI%O}SiCYZre{hXTulQ-ofy~@jv|80|e`529aXgBz@S#WLXkjOW zry^*~@8*ZUs)M+FlLn?JWc>;D)T;_J%z*&*jTGtUGSfFhUhMlR3NN6>9L*FG)=}EP zW$u`NLh0AaMKHQ`ST^k>9ux0_*ZDx)v1rDvLLq`~6OQA5OPh1ik9Nd`u*(RkQWX;F zOOVFD8~LAm_zPi&Ib>-+rwY9ae zv9Y$cva+(Yw6w6WFgG_hGcz+aH8n9YHU`EazgqWy^5I-*$$A|Z{6oe@2Ft~t*ft8(;hffd)i-?IsN+!}W|64GD zR7@ZxlmBWakfOshwO14rfUhcE!h}>E@$c? z?zhObVV@9xq7hEUWcxta!!46nJikkCVVrS+O)wkpKOuj%jw6B?yGgrJhZCFXM4I9W zjp22F5f{5e@Gr2wq~>^&OA0<2jwE8I1xm>~_RanHM>NJ%^X|z*Q2>U_?2J0AM~-GC zrIG;5$9wVNI)dC@B5wACR0{^>PQcqEm=PQB^@Di3m&fc)(9tx5Mn{W#g8stJ-oBpV zgd{NedS%3r9*R58kJ#8HRgx=vLc%n9ksicljH!8wJ2?ct&@0tJ+{tYmmvVWSTOmI= z-)AI4Ny4*@K4b{*G_-)~_wTzx?})$RN(Q`TR?tFdUJWRRwmj2+dn$T$LMJTxFFN5njiTnOzv4=k~Jje zWe|kOH~R+XEcsMq-#PL2`+f7DPK$F?og3T9!apqbe+ zmy(}cq)LpNpwx_y)VS&ZU(t`0mBQv{NpjAgP!0xPbg!$XJ@*w>9y#!_IT@(YL6vS< z3w-e{(E$z(3|9s><`CVOt~&ghf1$#w$~FC`X5c%ns`>4`rZvQ^>K+5l@A5HU%2{-< z?d_}b^ByY~GZ=G4q>b5Da24*h>hjzj!)KZ|)*boVhv%ChxRy#*rMp&?e+MdAlLu@=u8KB@==#m8 zb$=27*^jeU;j5KoHvA&P#N`hQq`q(2_X+T?ZnJ0JRPPsHgXFwdWJwD*(OZk|W5&(N zut*11f)9pcRM4^zHW>vxNQr3O%i!uLs8q&R37l>rUjxZ;%O1vAvd7T+I z&rY`QI0}4X-`N22!(q!427)D>RF z=!3LA=v39wP}5xV4+1W->l$N=v@b^GAdtApc`vo`tit=9D)4v5P>|mpS$!8>c`r8n z_8K1Q9(Z2}n~6<|_77Q+srZyXiR8t;X59#Exl5-Qxb^muW&)+?R@5OyF2^UpIOk>dSD1Cltvo_D6PwB=t7ft8mZi& zN!yYP>FfnP^zDw8h8X+(kV`IRNzmjP2i7*b9OL+$HG9X;A>JWvis^FEpAU@AgZt(v zaleWN%z=a_r4A%U4)T66{#1w?xs|XqWcd@qclw@^=rY*1+Mp!_tq-4Z%%6)))G&e9 zqFE>sZFpvmrqCt4F>*|L=r@SeGKiZhcF9S4ziwW!bcy^$o6!7(RN*+4sO@ztc2p7c zabgc{`lG1jsy4**w4-h%-v}GQFvu7+Ff~&m;Fg~Ph7|TFtosX;CnzCkbi|+y z4v;@OZ*puZn9N7x{xvTe?ttWN6PSsYsty9w23LBoF~}9CEqfZ zsV`hmuy0L?w$*dqOCV+%a>;&@0Nkq_INuxoInJ_Y(ns}syfV(wT(}-!x&Mh5^x657 zC<>GU=^Iqo2lAX}7s`oD#xo${{^?8EZ%d80g8wVJ&>EDWK!5*(f_m(XJg4zn%JcE? zK)uhm4)<5qT~$d9MOq&?hUwo;#|r$0Okj3>XF2>m-`xHJ3I8mA6;;#9&zTRh?Eovo zwAMub)V@u8-aoQHg44k8n%8Gm2Zp##o=`b74(t!tA>}?W;-0} zhwCQ2yyi0I9{?UdogqFHt2X(ss3|Q)CZZ+uvf}9|yaj(=as#FJZ&E-j62x6wZ=!;y z=O8%s+3m|z(@AAJ>A+u47e_ZX3t~_eSKFK5JkGe+_uiYS{c3Eu{W37iofYK>FYT%C z2}=u5QgSYFg}xwAM7USYB;azy{QUf(Iix>JqrhmzKB2f2oK=$gaHtVg5b@7J2NMTk z_HYC>sijUyhL$MnD(Nrb1H|l~^OlEe%TX3$@;$!yCKE@LG8?_Cj0o&|PtfYP^dMTX zC8fO04HHH_;TUBJ^P}3U3xNW8Wy@K~^v}4Hm3FZEM5qp_L41<1q}I!fP;rnRiuMp~ zh=Gx|__jb+A0rOxmzVnMQZp5B5%}v!YE2VvZJlrCJztewYTouSY^x80UJl*gk|G8Y z@4jRHef1e0nl69feeuc{TEt3G`4Wl81$Y@D?z$-Hldn~NWBq47_r3hGD8@zvB3wV+ zgP0yLH5aOE$o^lSgz96n(;c0h`gNZ*56<%AZ}%EoO)HJ;wDI>NTOI{6O<5X*&z1&Q;4 z{IXvzUKs5*Wpvo#-)8nYCAX$-huHl!(z()MjBKLg0G}gM!%`Nz#lK_+8`aa^CFVic z6$o1t3R#DLi+{)mq){1sqapTW-?Tq_z7_XNwId|4D%u@SpbGH#@WRk<4QL#GVGtrt z4cFJ;keJO$Z2kT4h^{LzZdEa0Vd%s5@Cyw6+>*7nwmqp3iIMmyxVEj0aS}kGtbP?e z8pXE$24M~~B7f+`+c&;~^6s|W(~~|$E;e`?f{}IZyVyovFQCB%iDmY0yWFY*lJS-; z8nSOhhLn4M()%&UDq$FZU*XuI20xUfXoQd;ZcpoY19Za(uK+}TA=j6dz-QB?DQp2n z!1SkALX0K;63=IjWNbM`#)NapW;!?hsG91@Ak4ZH0oZ=E&PwR=LPr%M{?RzVRt)o0 zUw1T3ytG-+S47rVbPHCY(M;Ns!^uCNBV*VO8S_!&Rh>F#F0)?O%PWCukJGPs#)S$$CW(pjT@qLTp%*~O3VZSP_t>CK zOYzGwSAyI;!VcDuIGt)-LNv1?n79{5w;r=C@8>v*UHtLUBPJg5kWDPdkAmZg{aps- z`;a^8zWV+p2Z0*FzroyvxI8hhx?f&{1iaMIEg2PCp$frAs+3vre8al7GBllEYxNXJ zYh)s3$JYUjbz~?KXY~`H)}kBXHRVR|^&H|>qkP!d%=71AC(yS>u9JZO@R|pTu9BD< zU0oh&HcgP+bVTZlzO5YoG>+if$%y-fAjp3KTuST6Ld5h&EPALy6n)@!Ne$n)_73C` z;o|2Q#%{eTgD!sVO((bCslEc1_^_g5Y>(Rdq)k^p|LwTz^J;cjAcEe09llSv9p?6w z(xa^kzr+6y5!+;uh{0Y)mM&Ok(fNUTS|=c=UVv8bGAoNqgcQ)ph}&`m-Daahs@uaS zlLiHHRadpR5dTI?3lH2zOBN`ZILS<@z27HIN(s^S-*H+6@5tqrg=R6b)!voB0U_UgptQf|ev?I=;HK5~N#Z{+&EIoEx#2J{Y0v27cLqNi zu;Ch2Mr<%zAnwv>9-T5Z7B3%zqI~uAvR7vtE0CNiVKW{j-ETcAO(lW6XfM1NmwwfM zx;?7=CLVt>Q2_+a>B^=crr7(3JP1x8ixa>nH73gve1%zggIiD=J23!g`0*3ABNsBi zOvb%|tNtLpvahp|%$qu_^g(5@FLC%UD5JCf|)KlqLpfJ(dbE5Vx=ritc zPQj;eMSyM@*W3|u;A8lTI7%*WLg5`5!B)A^@?)XasujY91>vpV?>PB--D#$~%NG{`T)p(&KnfMzXItx#(KQq9JmDlL2xi+wOle^)-698r0s zn&a1pCeQ?}reF_GiDIHiIuAt+BYeLxIU=|eBXuiC;xTJUY5;n+UGTRrtl3N#)|iAR zt>{bTUOe1LT9u-@i_mN_Hpk6fPqhMfqhmc=Sfl z()ezf8-O{@MFI*tWh!aT6zGt<#GcO%2u5DG>i2(W(Bj^#XFLL{N0(a;L5HovIIB0p zt{ydO4-j_cPySM9)J_GV#Vv4SM0T>%DstOOz;;?L5_iziXI-g#AMceoU{4;{Tjlib0p6<<#UK6DUFL$9?G1kHn386LJZ#gVyXJGAb7b1m6b-zhN2Sl zTA$Q@;G2N^C}CGP^D6@cUA?|nosdH=OnS@oO0>1SQeuO9M zE~N>g>Pz!k|4yv*N-RNjN8ky8>f(6PE^hgb$4;D%3@95A)ogpXIo0BR>7Py05=aZL zH6q)N9*$(Y8z3ev*gkCsJh?7snW_S8oQwHTTu7J{I>;MWO(r4i=K@KEcOl3C7PF&( zGMcDddci|`&dn&m#&V=8CEoPC_Ci=?Ukf=bXiz;uN*SyhYu0NWzxpROaaR!VMQLGw zf5Kq`X6Z{T3?ScYAK80oxXt=ho5=f|$=Uk{3wnvVZN%Y4zmxQA5encG&dm&Znr$}8 z=1s?0tFBH|!covLB$X?V>a(%0t_lLFAck+Xa8p|Lk|frQ6o%Nu4nt@tGjrL4`L%+n zxy-+SE^9`mVtB1jZbO_iBD}wNcXE!>6&6 zIP2aM96oVkwWYev{F1=|{szd^L7uMZu7%rXQG%Pt⪼JnS;%TUi1h-Atr310;KRG zY2+*NM>E;68?Hu(rtOz95rofG#+}hKpzi+vvPmzB)ybES~p*7$j`Z|F0STHH?$ z-hN(eGW+9_k`jzdEspCBXWcI2K9v*EGYRtt-s}l#5Af7>a@6WjS~;dueuTJ-WE?*m zCo*G{$#BwgZn&u1|$v9h54T!y9cgqw@4bMce(Q(Gs3mkJjhPH??45ED?(7d zgMKz?hmSTMmSY=et-dNa>*C2dRv8>mCjhXuRl?C73=V9`swlZxv}I~LhRq*j)-b;k zQ}mw&uD=BnGsk~ig2N(S&#n@5m=9r0<6ZWni0!s|pzCxoIIxbuY&yKX#8oy{X!3q` zliUgvnriRVPs)|kPZ$bKd#Ib-K(O!)*9mnT)v>8p=q{W3@17jkCZ?Vh)nU{u3(ChJ zJAjs&_HoWCl#t4a-H}T*-Axadt^x-|C|!jVO64pOAb`;X=9n@;q9hvizg>t$s-+1n z!unkrsAr2!N2!E(4nn|`w)YXnM6^JkVmn&Ig2hR&s^aVpMxwzBZIFK$s`(jo7c!AK z)mBl;jcPgo4Cni$kR*kSxEGIL>7en}mC8#1cS63AE*^8NxX1bBtiv4{67HXlO<_#2 zaB$Az6;IaS11BQ3AZ5zepHK&DrK0~1WQ`sQrbTihWlcPjYSK>4L5AKLpPQAOK<1TP zlNa`P%9>p1nQGBx^>ja5Oe;asB*>$9S{ZjpA61HNzZ>P&Jy%c10yxM{@GpL#+;kbTuCW`eg@0#PxA*sV;O6ppYy9`>!TI$a zqU+emOe+8>~V(aAczvk`L*}wVZnO#FJCeVtv05=T7Qo~mBjJHIvzN4z-N9PVB} zxG)R{*B1xt-NT!Z;UL{#pZ=Q)ncs)Jet3WkaJD(rkGXh!aeaILaCdWgxG^}iabief zSPeR7wof4Vhr7G0^M5m?3p;1mcaINGx7TOe)7=~Y&cM~(!++e9&CdGOgUjpthsWEy zi_`Uv{*{x9+xy2SaC>=vu+lxb35mf2r1R_3y;<+MeMsx>;Q=zWO)#FbcyI=3JwDuA zoUC|>)^zpU z(HSJAMtop>ymI&O0y4FSoByKFyRdn31>xUbpBznQ&HOtD{~da`xjf$<{k?pAehp#V z-$T+eHn4dH3DCm>3Pkm9t$cFt^yfjh{S1=B$A|l?%Y*g0g^kmzTU1E%_U8OxyanPlGJ))-J6)Zu zojp4LFV~Nd*e(D4`S0lR<^eKkNc=b8`sTOJZXh8(Iv6XThAf-MCrIzt=R4C)v&a9P z_5|@BZbEMAw$CqbAz8Y=Io?}up4vORym`EPyn`fkzGPFm6oop>;Z|zzj1tVclT^U4935X?LguSSu%Im zM=O)n(}!o*kQ6@MT%9bAH18dQ{|0}&xq6ZN?aimnZyw)5J_+|X_eWbZ zpmBZY^5(zwd3%1eIn=d&aCr%d0wg{cTXnsgC+D!YPeL7r(1vj9kl#^3A^qQ1ozUxW zXiy!Uc>d7qTsq|R4bW@gXS&Zgjr!h>6jVz1 zPdEa$#}`h}?G@eJL9Tg3pc=6D20BKfQLtI4GLt2+YTHLR3_6chdkAY8Op#Q0KK})? zQ6v&(CzWP*dUzBH!uDdtL^!hc<@UGk zBHtP{8sL+WzkPvb0x8)Ar{u~p@pH|vw2bx+E0JUJ5Oze`RB%uicRRWWPz5}&xyI&u zk}(qV3U^$>@C(LLKhgUa1#FoGQ+!r!6|Qj*2ZN(;KA~y@ol+ju)DKj&wLD!JA#3@!G}btuezcexSFb*4W05P>zr{rW zXTh21DG;|=I68oHw`2xGR(W{AsRY=_@6aic@^t_%lnx8iNKZpt+-O^Wn6Ck2%)AY1 zi8V7qs&GjIlzbRy_9b`a8zJmGpsNyqWzbmL8M~d=RF|j3`kouQY%=*5V5zruj4d{i zTM6SS;Yxo~q#@oX{pQ9OHnnb2W;lua0@8>)I6hxrM4PQmN3If3{Th}=Fml&0RN}?Z z!`<_9X9VO#+z6ea?}#Ilh9rkro#gR7A_O?T??Tu_;@Y&rG0ey~m=$>Cz#8RF7umI)9?svC2JLber0X#5mr0=QIH7 zLy>g7y27HIcAzztJ@Uj;*;s$^#ty5p>}Wk*+t!@}qV`|nI8|ln)%Q|@(!WaF00WY<{@CPUT^UY9?T$ zZg{z+xeSnhyyZ3f@n&ALqla~1xD63R{C!>Tol4=eBP7~PB;^XL|kh*k_`G%IT%N4Z&52T z_M$?bC+mHFauk0go4s(!4PYw$7Jt{XBn(odx%rcin2tvz*ayo=VX~XK6 z$YF+#%922S*Jag0>6DSO-Jh9Bj$fYI%>A!Nl=w;`sPw(rkt15bbgz3TBh_B|c~FCM z@xo6-fZ(&;W_wZrt|jJ-E#V>>$aRhSbkfb<=>q%eHFM@#x+yd4>vbP25l>kj9O&1R z211WYXPT6$zhSh&Z5(a;^5MN4t}fr1_SLvx*rI+`5#1E8UAC))AlCdO0n<5 z?8y0aaHF71J^^9#_JeP*25=^w{aSkK%`)Gzor*}c%=EH)KPd=!D;(0p_!*~(ZL|0uA!Vl=7CzaIzX`WgOaCBG<{C^Q38kzjNE8Dw;YCDUB3(MQYVl& zrlZZ|F?r&OK?IO zW7v{^vTQbdkAqKqa=rhsFqv!d@$6}w5_!LdfHcjHQsPM^1NukwUt#uRe^%3tCU5m z`W)Pe@aWuvIa*{7B1ZZawTYnjWf|*hBWRP=ptdd};ewl9Xq^R)F=uCk54i-hrg#$yjz6_{KT%%Z>R2DMg;w zN)wN^2r5WvrGU8a2e-aFoY@hwSazoVkzBk)6aGpj%T<;wnv<_4xjSHvcBLMONx>H06$yo4MZ2?FivgG1fCybRp{# zXrMZ`?xnug-{@88U`(C39@%zf0pA|oRE_A3(08}_ct5) zM33Rw=NuBWz-D6gjEC+~Vb3i^YG`Q9;uN*`tjS zFhrs8iURT?yyMOVGplPqaUg8ShDU-jXYo`3d<>pQiIiUGYv#+gB5~NuSLf|{w#IWy z{&4SCr394%!E`Ms6e2JCIZ{c@%^K;W*B&0E+M143OAu`D8DZ)mrb z($UFGU#{bQl@(QLTSkW8OP>XL41iNX*E-6s^dAT3x#aI|3)1cpS5QZ!xecHH&`wAx zIh})nS(?D3pOGw1wyUTVx*snkFP>K7xldS_(>#mJzmD$!G6D>d!c;McThlV*Gdjch zHi@8UIE!b^$THE#7$qOD1UpEwds*PyqDmFQp}}^iLYdC_RZ~U0lOy@A++;)#R6gkQ z)a-Z1e}4>D$LVfg1|#3H|L!m-ST*=DU+YmufT*yZ?PQ+lFL$Uk0|JN&-;hN4E}?z) zY8eA>&0O7d$n$X5YuOCyL+?JO%l!*uuOTOMEzqXAxCui*-{Q}{E zybjF~8mx4E^eOVpz6s6^-N@#5&r_#v6tFk$)X-qJQ`&4ty_!W@DGTBTHI=eNK0@KY zl^9e1kvQEW#DH0K$uP>t`<7c)D#-pD_3tik|Bx^=HyCxG6g-h$y%Pb1^_Tf@9Z&%W zKB(yk1Zao%Ft6)hK0ZwxZC&$9SpVS!REg=w=9M=QwToicSbvb$w-kC_#d^gvJ$2 z6n+`C>vaC(Rc~pH`#ig}tabdEX@eAUBpVA_y* z*i83|8X%T#DqMEk(06^DcNS8sGd?_U39tflnj$~@HY(nn*Q-a!N^ehXE{f^DBS`3Mx|VT>D#n#b{$y4ArLl@ zz-^ea5-DV2XPNdODcWnUZ~JXH^9N)1I*|3S(~MLbJRxcr_)VNLmS!8n#PF*3-J}aQ z@FNT_N-VTpQl3V%1JyD)+#oMqTKO8Y7FCiwAxi4+V*;Fbm~?MDEHT-6e}F&slbEny zo>df$_TyuCI(6JNyA-bj5IR16e-|DcUoHfW&WEcX$f*sojpJ9xi*FbSaia%oBYyE( znE9x4(aX*Q{|NueTDu`uUQ&CdSr>GOdkx{m<%xnpZ+d{Q+;1ExqqFnbtx2AYh~T|M zI3Y=0`anY`qT2HGErj2;JN}vhNQ9H`aW! z+U#<`N0&9@sIeD+hLLYqD=^<>ox2h@=j>M?gc;LbX!!SRaEV)0agJH^|xeea; zJ072ZVrVt>+ADe7jVz#9<@*|NBezXM>(eRh z#|%1I4v+hKQ?6kC5W^$0#}T;TL#NhQsp)4Cazf3$XUZ^?qT_IplyD-WQ%nKHps0{j zfZX68NdF}AxjP~%mfEY8vtxutUFkNgm(~TH>1m#d8<6Sh{W_J=z*f8^u2&|YS0mCP5gF|`#GT5Tu3Wc3A;phz0FR((B+z*7}lPu_e=0NLP38SORf$0sOeTe zK)GGNp_Xe&zyHy4Tp^e*dSN8w+nb$mF5v=n7^{-_yS@*O6J@+2PfD&|+L9DcV^RNv z5RmeFvz2QNB1RQj3PmHXI#!mAmNu(+$i8*q-g~J2_9pAvs(~TH8Q_@DpiNylM=7;6 z@5T-~oF_Zw1s7XYVCRKdkua+DqX899c{0y^mhtDQ7#3w6&+3+Cnhn!?PdYr4ehZ5! zGYA7;L$bMJBNx7RX>arp&-;YLt>zgi9iySxm{R=WIwBzJB@QK{tq8SlSvG$~!O~c{ ziu1#OZZxCPDy@==(7+#nDdBrMOwot7{}Z>OJARPYw6WWzHXH6E#>;s!6|q)5P=7CE z)kN}|>`kj$a{p>wHt|gi9!b2&H6zcKQ=dU+2gr$IFUl3FkQWFkKa}i5W1Y5Vc5#b* zF3cOhZzu7LSs0*@o=`Feh`orf%o#VAQZjY=_U7EXb}3=} zghnoKipl#sU^U!mHnxHDhADqoIi=_9XCkq7$B3Ne_6wOv(Div-NGv}YWdsqU%oe-X zLLA1j*v=F??7I7WVe9=_H*iZ_dfbhrOgY`TV{pRo^Hdj?&11GUR|SpoQOG3wG&135~Ea9oClf~5{C}V7_ zy!L$&+@8+gygvX@FS=8u&qRs#{;>EFy(cAE{c)=DN4f!@iSF9%nu;q0Fk2`nS%``c zUitP_YULU_qMpf}VO-2cekkx5i0Co<1ZJK_Y$%M4^Nb;xcj~$LzJw8-7dC#2%zJbq z$9Wq$Tn9{N>SW$#*pji$+t>cqtG&rt$*`_t!tfp#4_wD44`2tXp~y@Vq;yNluT_{d zU*i}E1ta|)dhF9z{q&LiS_-5R1_k#OEuTejrz}pn zgZ`CuVqV2y*AOJ`r%Pf=kJcA#akL9+TdLm89@f6xPXY0TL}OWQ92uRlw0^6aj%mjG zfpqa*>1-}(f1%id6~Hrq1=lykdhIxmvfF>=g(Q{}mGC?f<>oMQW|^~ug+B;*2*+Ye zt?dkvdPn{2b}v!h)8Pc`|Zh-86S{+xIewMV=8K+R6H_!7hpF1aH2i&}6Ih z;_&r7EQU9S-?Kv*4*D+rc%-MeR)M!4N|HS%XSB1%NC7{Td*rAx4f6#l(z|8l7nbOH zBB1NnvVNLjd&(Qwr+)quQ_|3Hg_R#O55naTZLwawLgvGr+?5O>6s^}^B4PJa67DoC^obmh{2KYnysc+J185iSLfcXZ~ z;_9(^)r1=reBsu+8>QPn(0)hS-3I;IW}K7KatTc+Q=!_ zF*dbdjmW9)nhhF&IEmnIl{YRUz|R`!>{HSP*dD*TufrvLO$LoysO2Zmz5M6R3j*&H zfDU>AKF&4;{m`0b=_+BpGQH|v;55tW!=(YF)yBL~G4Kbw{npXKrq2*(t036-2c{xs z3!zN>@9ajRhb?)70We@-**d}|H@AC5A?3}+YwDB`-hYDbxT=9e%UUbcKfVVB*EF|Y z=zEH--Zxw#cYIVU4gF&Mlr|#i8=LH4oN#mqkP@+PUFaMcMxXLtG9fiY-_kbyICauv zp=>w_(j81Dp|`f?LAlWx$p=-3l}9E0g58?Fm>3Cv!Ln_)Q6>923Kf{Yag=vOcx&*a zqiLYD+cERqXN(#y+~b$%R*NZ{`3?rj{ClCQ_X_?=qIy!^!3j$)I3)Pbl1clE0hl$Sq9$WEZ0T8S^Tp#)}zI6!E(K|G_ zw>I87vk4KvJCJ(3N3`5M1lzqsVF>8VEm-rw!oK$daB{dk^$WtUaOg>QkaBGz6$YWl zyC*>uXlan3d=7$Z&_Rgw>?}6*t?c}GjdFIh4Pmp>^J@rE9juSm&TrghgZLGEZ?UHm zf>~$hmzN0W+FNtQ5S~0ahk)q$34CiIb7b}Irp_-R2nji8YR0yqmx8Dx;s~O^^^;o= zrM&}p@V)iH{`uXb2PcTD3&hFc&cdIe#e-vr_(3_~yEAE{5WVy+1>5VhS@RnZ%z~Z- zC0QP+m|eePoOi%>d$oNSYvte=x|MV2R=i8iguv1MW>3?^#?krN72*QPincM`sccGvd+iT;^V_U}%7e+wPa%-)% zXBq;!XP2gp=<_Ae|HtvM*$z0|o^77mJp31`KHXWJ37Or5A4AW-I6FLC9IZi{-hh^l z*$`M+85>&NgF{n=xaRTp-I|3~7{p#71V36jzKjE1`s(r$4y}axx*4zdyD1%QjrdKk z-vR7v2+=}=?wVSLSS!SoA=V96`nrdAwojpFT|m@#W4Lu{3tHRIq)$&bm)l0iadwXo zcl!XE3k<3867@5L{jToeU*g36-vIIM@xLGjU^2-RTp4`BCX7C{co2f~anw5Gd*juyy` z;{T^JvqkM+&dfaMU&cFKCM%2=4Gr)@_DmR`G|~f@r3BQ5Z28~n%v&5J@c(gH{uh-c z`2V;p|BK2J{Nu9x9~7M69|!0Epx^}mI5__Y1t;M@4$l9g;JkBUVwB$f{onnG{@>=z zDx_OFG0?GHr=9)9>;Z4vUk!iXy6lg-NMY~L*5YrTss&Q9z9R&thZye{7k?XDSMO6J z&&}hSTj>5w2%8WMYQ9+zrR?Vi<}{}tA={t+3G@NmWEe|q2FjC~w_F7j)%kttd9s%B zAdnK1-G7yY1}5d&Lp<5=_bwvP~$=7AMM3AAVJ!iKY*F z=#0bf(t)m)P~gIS*Z^{+w>eq)8Xxc9o^E<`ERGec5zgj+@hr4dyDs?o-M#?GbNum_ z!0?XhvuG=4*6aWItZC@$c|K;+XlDIerpeDPKsqlpeaT8=V2;pf(3-7(BF)&(IR#dpp*3dx z3fOA#1I!&J&DtyvgbCk^dE%tE>^zbL1zM37WJEaY&@xIf2(E5jRY| zb_ap|&qI#d25QZ}9vh!=-x{~$d;6wH_0G+hfTJUdUVZ$(Y*_*`t)?ezUF-)$Zb$`; zS$gC=;+7Q%ymdlRFniaX*Yt|QA-;nO%+8l-`nSf$7>%O$|D&vtP9nTBm z(s50?ds#?={UU`uDA@Y#9IZ49wwWUbYP-Ym9*!y_-h_+-K`N%A>R#e?V58TZuuQiO zDk@)f#bMvdFc@=c5PneULW6BN%vE`bv7;X|d2i_If^S~fM=|)&dq|G-T$u2h9UO-_ zioM>r$5T|4RD6z!)B`5Ixm%FLPwnT$jWUVa5tl6Phvl@W*Z%6nk`zCR+c?4NBKHTc z1`d2!6UW4why`-T4I*dto+5WVxYs*Es(z#nk;GRtbuBNc*0(x|c~wvlHxkv|1JoCY1+iEz6nk)qTWmvhB^pE#)ReSsVhzlXFp`hiCF{(bi>S1KOq z%Y7_=i|9nwA&bmU#UBcpfU7FajHp*DX&BneDKXwDkrU=6s-$ee?^sF{%ogyT6R=jC z%M@}E{u;IQcHB?P&a_|;NTM8ME*s$l(jra#{HPO~g1;5%ZEiD?rt z;#RsuZY=njD9ThRV|fTmd+m+8_iM!t$a?oMwELtNJWtQfhotDL-+#VEwk*tSOgGkv z4JgYL1SJAYxmm3|jr3>aj{|JN%7e8kSrp_*$l@3hEpgshJ%?TDiy_fBjxPZ1Pi3!el(<%~F#6TV1K2(t8)226LabK4%Iebh z3dQ|P=&XV>+#f^(nzlMxokH=AuluNVS)L^9_zF9%_OHn(S5=))IAMQ-gJQG2eN8Vr zbUkcE-!V(k7u)*>8BT8Y!MxS!-__nxb%7G`+xUDbG}Vv(GFan1YCf0Cp&zSMsCU9* z-U`)QHTwc}6apkxttjoksc>&7ccc=Ap|8WAaCd@?zbW0SNsyzIspqWVa~iB}wI#zgpB-?BeJceTpPGH%*TDD$ zmP}LS9m`(J$ix1sIkn$BeYuoEdakOrXmK6i7`XYStMNpX!eNS| zZTGZVR0kB508L6R4J7AtPKfJ>5G2DMBVhEf@uyN<3T*_DPs!fi0I}D0OW}esz77*| z{vi=Zew$YQ3X)sJ!}ln#pLtW~a{+3r!-pNuQeXKd#|=rhr5okF#Z+M2SFh^syU(3N z%=Q{oD%=KdEwOWaiQcB`{xSQk58=_UhCcF?T*Z3aYSOG5Bt-9Af1W5b!kzEPJfM0s zVZ~uLQ=zaR&%USef`ul6AH=jxU)2aENaSm|gi1sXPiqd;I=9t8vCD}pWYSgcflXV9 zFb9{%@AbI~wm#E!I@nyJxXs^k*(Sf{VHZD5>INJSGuv3Lmy_(tTNUMKyw5_=#2huE zZOe?+t7r+TrWb(Um!rU1qrg}vzTIR3=Qh*sxxa%q(b9QMl|@?J?PZc5K%Cp7f~gPbh=IUue1wn^Bv@b_ z=tW4(1O&aesG6#h1!!$FX&$&q-&5=wnOD4LY*vr(-KrMYYK#P$_ zkkCn}tyah!w{Mx-Y_w7thKM$MqiUq>4PZf{J+0{(OkF7bq9^;Dqkj6gxJnzj+0}#F zKTcl9dIRy2z0$ppetQcfqQ;*K9hV;p^?tKFS>e5s2>F5~mPP}fnINI}d8TP9X+1e= zC7%|hvOpKr+^t$Vt)etukQKiMJ@_R8P4jl9qLV^AjY@k#zK%!F`iaULQ1`4T=l^xCe*qTRD!VCQt2r4;wtt8B$)ATPkpUNa5;l+D zek}V#;(0^g*Io1biUk}yN5iw3JXpjBUZTL!_@oSft#!P~A`)6Po-DlE-i}dSg{=xqKU`jg zg=nTOC7Ai_S#X&<27gnQ&i7N`%mG8}e(LP|X#zDqk({EvqLRSN?Wzp%T-H+iN#n{d zOmT1%k#Yn{_b&I;@coM_|19fC!}&2gRg5KPnI*ir4xWMp-eT&#!>p}moysLy*6lO9 zH}K4Z+P31o5-d@xD*qwC{6*Fc^<7bBA#b;^k>R|+fy$Nw+lf}DgdWua)mB>^U?*o4 z*vg|5P+v+u_MD#$HDWuB5$=cj#-0sk}fDBy+#>6g1t(a44QrOGl2Z1A{zi{C* zldoj>4>1&4Tmi;fv(`Ya3U)TK49#S19z^U7ZtHoW*tlG)y5S}-K|g7mlM$Z&kvCq1 zeX-$nC8~V$0X}cJMs7z|8ViQ z)Su#&CJ)db{0sn93;v$;b%$2+SXM6l*~lMGEY{y(T|Edi(J;?VQvKKgxYrs3w9(P% z8e8|tLr&=gJYUMZ*w$5l{=NER^b_|JFsCwvhK|oVKOl zqr-r<-PU;9H#GZ5`a<>h_fg8O1XJs#zBDX^TyY+r1BE|p*(zPkRuX99DGCKnj<**t z%^iE91yu)`H=H&jzW|~Pk_Wj{3!Pe-INO0*#;+r%Yo6NST7OowYhJ6ma8Cxt@#^_g zBik^HN!<-7_eb49vn)gSA{J>%qU{d)`={W~njmV$o;}LTOEm&j5C4V+-`(RSf3E()hu{*_?37*yzuU|gM7mL2Edr24w=2}Wcy|02_yRBE= z)6*HvdYbXaxhIH#3kCBekku^n892+u@T;feSY=Qy(&glDW zZb^d$d!f`3=-@~F(qlOl1Am`L?kjFZ!i#tQs<%VBPwuw&bNU;3{=SiKG_;2Fv#**9 zoMP}@wm#q`-gZX+UVTE=52juQ+wM}G#%r!DZXsJ(SB`%Tk;;|H_^CHbQPar%loQaY zCVzxvdO{A5&5&bUuei6DRpN@BQ=1FX__}zqqhA1G>_rlFk`4O%5;05UrxM}4Ug}=r z&sS3$%YIm5EY3UyE=@CEN<~+ykfmH$i_1$*I57e-TeYU0l;>?)nCAPafb=lYswbCO z;pH3C{T4Mml;22AW+=|0|n9hfXA z&Qedj_VX)1FxoK1W_}ML5(?XAsxoHRLZ;GW4TfTy2O|ra2jAE~l#x-E`@-BhIQ|B0 z51p0EHlhYtbi&4s%#M<(qC4m}aDiO9IZ}Lnm$;@^)mNf7bG!Q>4|zC|EH7bhh38@+ zQ}b~ZCe4(E?zBqbNd5X?n~Q8U825UBsJXPLBSwmpamLMZ3Yn6xSbRwH*qD{sz;D1I z034U*x4iypCjG-i-;kUMYD79-8T$S#)iizs#jc~}1&6Ltzc?sPzMbgoKO-T?G_t56 z%Cve%;eOLyNh=n5!hFB6OzFT@efZyAuy=hEc_ioJJ2DuDONgVI9t(Q(*;oA!gl4Z^GlQ^ z@L4FgX)EjeO|!&{P=$O3&(%>vZ7n zlo5Da1jjP<7ym{guI3@7I)8ayQWff4pf{BEJ<;reXHS+s(T!stegm6E4mySd(WI%QWKf66hv0_$B66GD$EUPGEM;O;AQHuq5pH$a80a z{B4T+s)_P_KNIks<%!sk_;Cy=@r)|*U$@eJ=x6`3AQ-tTlX|R@i=q!|iS7SFo;>a; zKc-%1@_I9@BAuIb6?m-pdhoMR_eXz$BUQa6c7^V3I;12NU;N6JcPbY$!Rpe2V^DN1 z6}j*Pcytn*<{gMo<;hL^5ELoF0!U0e`;XZ_PzA=~2`JAc0nI`blRu>P-FI5T36Dp8 z??~%SaY}S9QWfC|VjCemV9JZR$C<3j>VEmlt8}^jc;Tf$_QEse(77je?d1A}r$8xe zj`c$iq1mq!Ns1iRjgO=`TC1PX%mSO1SxrQ(MI%8Xs{n_U{iF#VR_onAKB5)Ff;g@Z&%$iO+}$<*_XvK3h3lB6(jb zwUq>Gh1gvKB`QQ?m`TLGZWqs&YlCr+d|qxs-5@{&eaOa(mQbQ{-zHik) z<3XJ$A@;}mzk%+ zxVZ{27ve^Rc@Rus7CyE?{~BHSe)of1XN)3F97Am6s$lalgjPF~KQ7SjXZ(I%U_ssE z&cFIY73;SR?uaWFEL!@y>@}8U%L50{&)_r79l9fv1Xs(!)UPMBX{K=-WeuDYjM;sj zcB+wo58dn!xkLZm#{NIOj=KNpb^N#15&Sn^$A4=b!JXIfA0Op^qoV}>_EG*fI!f?w zALaj}qok+2zl=gc2p+Ni%SZWd9VHC46iEk$1bHH1pGq<5DDE%)W37Y{l`ejf>t^hhmYP~74n#3$2mVE-f59o10=izYp zwl1?b_2M}q^Y;qRhHg={3(CBMuw-1#I3>vFexeIm@c8wA<9Beorej5XmWW}QIzCuH zPsl5(y~hG-WFJeUpIcYC1l&3+E&ti*-kns8YyCzNaqmMZlELRVu*QnfFYGj5aDwuL z5b@s9t!wk|Ob`pl(av>+9T#^Q45S5~#hEbP>`?qYrvuZZ%StA8ewu zdh+d$CF})@AQkXw1w9CClQ4o3f^kGLhO?4ZV0UAA_YF z-9N6cPjq8}>@t%$rI~MQ94M%>K&IfAY#SI2h<=+DUc!kZ@?t9XU#i2japUA4rx7f} zPicGUm<;Wwz{>0ANRf=6E@bfAC`hG?rr6DkEwl)_C_1y!%Mg>l$3XhkHe1HQL!V(W zIYukfw%ALj*ciz$s0})wPil7|pa4wsb1zkByQHAp731-GMFKG@>eSSlPpF>Cy^SmX z_6xl6^B5@EoFDM<#r@jb(ywA|Q-IX>fliYM{i4&KvSJkYXWSYJ;_C#IkPx}yA4Otf z_aEN3BhgE4D9@RZ3&L*&bb3B}3ROy#YbcL2P9j(MDA8jAvWf=2<@7d4a52O1z+?X= zNw`^Oye+#C(ODuRf|{&q%uHkEcS2ZyEs5l+4w%crlE&SuChgKFPJOrIeIONYm4%E~ zt*)E~#a5v{2Z7G=E(4TtAuczhQ4S^*b~lVPed^j;v?_7hrf+)O0s)t}YTRM~>$4P_ z9V1TM>T$IE- z2kaNr1UGDhhRY2#0UKj;0O{xv?FIWUG*?!i0w_1%$guupu2Z!g(cFud$TFv)Koc{5 z$T7M3>t?D(z~kSkuhaFhW86Ajm9?)!WtU1GEr2n}HrB`Rz!%ARpH5dlI@+@FSy!Z2 zKP$`sjY~Syl0OBq;~J_Pv8ci~3OAY}N+ha^$7R=Smnd3A$#`-dF_{CvP7+qLl=5ly zi$nwERL%kcB8uReEK|G#z0+@>^?QRwX>h9&Y^y_ql`Yqw`@g*LTH?jg{-YM?4|DIy zZBF66+umNgk~B8TTBAb=BnH?W8u-k-)M2WkM)Psz$C^sr{^~&4T~bupDtqO{KtII^%Ng|lJ&ysL8Cw?-3uALABB)6;joaaL_RrUyT-y`0xtzlJwgM|_4w#gD1 z=!WE7n+rrg$<*q@4tU@0Xn#XRAl+qv{YxP)MAOBN zB0bd1o%%KEvGN;4tPjx3>0XP9%G>(Q)O>T=l=XdYm9Fk64L`Wa49`Et#ZnPCtkW3p z@UxA){oy%jv+=dI9pEVhSXH76DC4|xzPH8&yf?#T_2=uCKad6-Jk=daj9zIl!li^_ z2dpAT&)c|z^_2A%<|t}GL(~RTx=Gb8wc@1j8&htNnK_)cD86#B1Kvj~nSVmkSnNxA zn2`1o-plmz$=i^`F6vn|-=J;MQveeW7xN!eCzEQLbYbmcfH&|xA4S3wVv|Qi!L;SS z!k&XD)S2_Wp44sZPKi!D|F=3#h{io{o_MF9g*U~|*00$>%g=kRhW!Zbj^s6e0tWIZ zO=X#q4nO94qtDzr&9DvVBXjOCW`ICmB`9~Xg8V1#c*>!RH}7?WjiTa>;^W))0PQYn zb5e&oe>8^F`)4K2$Y11Esx~E}IbYvszia5CE(5otnO;t`j|Pq7?d`1K+rETFh3$I8 zUCP&(CrYp0URVQEgI509mqL6VrakmsXs?&W53{l-_j@|Br?F;7zsFp<`v z*e`!-gJ1o>NBFMf-v8u_-~BC9g2L(K{0Cvtgs6#IEFUDPD^BfbTUoi!|DpRkMR#gmTjH^j?4zR42m6x88C;jfmj*C@q11^t#E3#!&t^T~U^mpFVvLlS48 zzLemKkfqxmv$g$do(*jJceXQ6y2}C%+zFxF+fQuKVn-Lbw~+p@UdkGkx8Ls9UkS4(?eN*M?Z8Tf7U4;HLUWiITPRFYX85#+M zJWh#FWkDFo$^W3be&6P)CbxQ>qVkkn8pU7XXUt)*nzmZA&8e$BeER~1WOnH)GqxOQRvOoK9@AUW^L6s8N~J~rlX+Eo=gq~8(8v0v z&nc>+6sTXFa(K52T_GFBy+yeHCFP7EkQ6H5EjtCqFS${?qyq*Cl{TDvV<*!K-O$rY zJ~^pqEddyzg1H+QuO4NJ{dtci)S>(_K2z{glW?3`?PM#qGN!LItoc+37(vZf;gkc% zOX|W$l3ByrsOfUBn{C|iup@?qF>(JB;NYUe#@Dbx&ppFe7s7Eg@VIqwen$(6omOfo zJ-R~=TH4SD4t;*`l8CAN`7Tp^)KXxf)7?|B7=pkfmhsVc`_rtNRxE#IMcBgka_H!(3NHM zlsQQr5X7?NZ<@~&M*9jg6sljpKpcL~15v(Wet*^zA4xn)jqE_jF5sj*@I^t!FXlt| z@wnz;zXNDCGsWD2E=$SR#Ic6J8hJcL$l{ribx7oN)cwAgv@Yh z1{OylJUBM~l^qR_(h~wvmXFQ=_RH#BfzF6_H@T#ZbNCLYz?%eaE#q25Q^+ zl_BbvE%m!fXT9$ltYd&H11vcs@&UT2K0VNcr;|CDvk6@E{3c^IyEd`&aSI^>qUx%hFy z4s=UQfG~qUcb$-KaB+Qm;OHBxjKTy)X8H?+NuXMH;8;s}m8AEF!t9 z2M<`4&aaoziFtKNueSzhO6~vnM9+j;-)2tMuyS$Bo@>K?mOQX*1u%$#tXLY%Y>A#uR6rd+&vO6@fjXcrxR0z5%&iSkUmx>#iZuvgDt` z2imuxcH$Um`c1)mUwtvtn!iA-``6(NC_}?!dJ>yX> zw5bYu#bO?iVgEWLRZ9vSu&v~a)g*iadM5gEkCp`Iy9imLD)^UD1mG$p!Ug;$4_(c^ zbj^4qgW}Kosk)*Q@yKC1RgWwrevBDy*CjKy4En#&+7cZ6j00G66Y_#3JV;_ViF%n^ z8rz0$yPfeFbYYRPf9i{Pd`Eycv!y*9Dbm5$$P1545fPi25g~7NA1Jo{$NX6PvQ{vb z!Tr6me6FY$k4oofw5N6oYtm~KPLFLdq6ob{#mxl}Z{$I2sS^J2%NOdKfQ(`E-wwiu zVsiX)EEBAiPd6o&Kz7san^>=Nt_D&sW_gOtoFOYyf)B|n=vvpFpMH<@F@w{#flHQq z7Ei*I8VMB2_fv+vWqAB&lFhdRT`J2v5mjJ+v+AQSXgF80M-{&0r>ZrL&zss;PDU!0 zfBrrvdhZe#s^*cGl*ffZxyj$#-QIkOMmjv%{otCPS{8@c9y9`Vv$qd|!&Q2fB_5Rr zNa2-iP*6N8HRd_3pB77xh`1L6)N)fFFAM0?A&jiU?4BgmN>t@Yc8^3*e#cN3N(>?dlG#|QB}@1WSeJzhXR!#+Sf zqLNR%JuPX|wo%LTkAG;mf83`tr~wH?#?ukTt7 z)Vr)4fg(Y2Aj+EQfNh5x5k*1M9fbow!3X3Nv?&ZovGxc#iYknB@4UfKWrd9BKXesNZ|>WQ zMwNlncKKadPr+e}hb_BM?yje6N0*MpJk+&MbKI&wv_n;`AH3cnc(n5#{Y}nTr7)^` zmY&CnkM(F%M_5U4*y9brdvO|fvkJ3kKgg>q*=L=EM&X9x>fQE^8s7i3gNwzs zVNTdHL&C4J5p+bOK4R2<(q$~atY;GelD$M})-$&wMhJe2pZF1U@vF+7k4CZ(JD!K& z%x$hb0x#I}13;yV~(yekjZ|yPYbn#U+irjDGI_ojB~$%I3QQ#qb!I zYveD-^by|B)UkV9RopEUJk&QPMZ5J}6?!f0 zU+^}y!1Cc6ZVXBpQb6%`m4R^Ixss!+RbKx_o%qlJyNy&HiVgLUEMz->fCgOdM3EWr ztvDP%MAz1mEA+F*rxY6dHk%tS#KMsc|uxN^t zkOQJXJ|~}hNu^Nk90&WOvuD%6cwz|#_8;>KO4_y9Ou=%m!)NkvBZ*?hz6vtR)YiOf zZ{P4;i?>hS?zPf9J3s(>l#GUbsj>8*!9C&k7s6a@z zg}hDxkBn1}h#Sgns!E4R!#G-*7o~|TR$={?1t|G#xoKorheNdo0$9s@VDt6o5k#-h z-U9yxN^J}C0r)e=HKvV4;2=MRRf6kcn=Ga_Dz3PciadvWm_U8}a}yZT#W{bO4T-R; zTR^*=?H-gH4=c%uB|O4^+w2%sros>O*t)v>wE1Y`0y3p*St)vug9_sjTZRsCG7C%h ze!)*@ChsjI*}Z=H7gnSso1`l*h@TTgG&39(7mJJ3HQsx-y}$ZY;To8fRH2$t$nQ zoQj8#J8r6pdSO=7D;Mer_C9o!WgR5kc%jD1>wg1KW7(s4q&c+ls^zp^!?M%Wo_J!& z+U?hs6Mg+$sAHl6%AAUKTDZTge$wTY4&Yg`)D2>P8h;oj%Oz>O|8qmE7{u4j?Jsg7 zjDuL(2k4|fx-vfdR6=a=oWhP%T3rTR9{PEYRgv=WD^9{Y@`jAk;!c>A+Wbe|1&l4! zgSeRms{9n-UujWm;j0D{9psDqdD$5sX745{IsnCnvDvCQp=~wtFg95!eQAMP z*frv28Cl+mATx zU{3DQW?lQ+u|!3BnUvAdr}4ud?QJ`GcKt+1m#gvxMnR8j&(~Q4)5r@h*Tm;93eW1{ zf6IKt1pMVu#{ZfaE8PQAhODpc)n23F>|!oGr|YA`;yYOLZM3zcl2A0+DQa*4CRM?` zj`n|?q1-=K=(@dJEe!fAB6z~AEK8NGLlqQ2*-f(>p}60J0OjYK4>`U40fxH7=u6Hs z_EM7jxZw2=u-STnun8vhPuq^28{pTebf%0vMI)Ot zQ1<>k#O`c_ZGLXrA(_5R6TtGR*b{{GG8I0Q?7x3An}%~{g2%6cv+TT`JbXT&Xm`xM zPL>MXB%h)UDn72_l?|nFY+XX=yOH3j^6@lEMj3J*jVk$0T&M0gK3_VrBI{L2<0@Ep1i%7P#87-w7=NjjBQ@cPNV8 zd1S;FD0fV(aeLcvH2?lM;rQTfA#w~50jc@NB&XpzN#BK!H(U(#=6)XB8ZQ+a=A91c zjK4AkGT+T-O0v$j-uZQ4<9#ByNodvJ>|NCOjva%Ow)LpI3P|B0qsiYI&N_aK3P?{N ze_ZmUlz^pmf*te4JZ5F&r!J70VZ&C>hx6^Fwi1o<^h2_H*Z!I*JjCWZ`((jeRuylk zU9~3`>(CBuP?C#Bh$MFq#yK3mxllOwk4WwYDpnkB z_s8|EL-IJN6moU~wx_#8Mz;SE?_D0B?9a4xZyns#h9HOG@!?AEI9NV7f&U}y+a5|E zf?RTURgyEve>eMgVfXmt>I~dK4W5mq!Irf{=)5~e+`;-xG1PrIzO%_eMJcd8)jSE? z`&UT_aj-HT0$YJYbrq~LeKxNmL$_yRh5cDgawSTF_&6)!HYu1-#f4`!=6xAsq<_R|&O80u#Ion1PF zP|zvVrOF)HhMa|n^SkEL3an#(2l4|VZlG4s>Ix*q-#9$H3w?UFzcy1gy}1j8h6*i^ zC2{Ka@WSrtou2Ug1aYuDRyMx`|EGh6*k51iY5GUn2OR^5qobwXw#A);Q>gg@h2CCC zn}=LW2&h*CW!@SCmHivgF(~xu$;Nz8+w=}p1G>6^LhnyE_CT`+&{Z!lpwP?1vrF*1 z%F@}{{%U{B(Dvb7U+5eG;0LRVJ+NK)@ipQGs>!Uc4d-kg9YB>WXk>e<)4##g)-E(p z#O28;q(>|soP!2&b%{8JVvqb@SUfoecP78HgN@bx>Urp@*U)qkNASgwF4)fD*~Qi6 zIe;H;t)|Yw4h}95(5z1nHpWY#bpXY_x;#1FoJ#{OQ#*IVhH@Y8&y{s=?w&zvK&YQ~ za=0=CO$H7P`(SmjU;?rd-mQVVSugj^?H(OppWg`**H_x|z}nG1)Ofl)Ion;GZkylU zhE@;M0)lRKGOKqES}x$~>iqQlU~#Y$wsLZE3N^XT57t-zw#`D#wQK0L&%ptFX|iq+ zYPnrpU0oc(x0bVD3kL@X#PvDiWPfwA7PRjko?V^XKr4A;-nU|K@8mAc<;l?=EW6<@ zGq}@gp1_Y*hZh!)?#fGN&}!{18(xQ6T+pn~Pr$)4tZQK(egfS(v_{sJn`_n&cOm5{ z6m)lWVrU+0L$ikJO3)lur^4H2cA;5AtNa|Q%k{!mPa(}C)M-N;tgZj;oP<{L)m=I4 z;AnBQarJH)US1)Nz|rPP9F+U;;`|12aeS~f64SbW7yJ71=wxH5z7_OC8w*;MSEulO z*ss5vd*^3&CAzbtqvfuJ<)fpMb-Dp$@ka0eXgopHCSs`Sg#93u>OZB5-v0;{G4LRw zhDrmqm|&Da&^f45FdP}^{3rjziB(WVY7&OgXG-`VHb)pTTI&7(cwmYKT?e|}e_r|y z@S!rnNFTtkWlXtYNdJ~Q{y!O{{=s<|PmB9c%SfLV9H7r$yi`w1{&TvwXa1H&7`cGo zRYJ`PV*IH6m@Kz{t2P-9`qkz>V*Z!Qgl@V& zO?>DCXkQxdO(deNOPa3UrvRQ=4|&>!>IWGr82PkW9N(gGJnY~dFh*ommXIUYaGU{3 zg@A@2Hm{*u-1m9zWW5H+?i)85^mwz1L_#6i-@JB!3tf+pylMgVwVtYMRl6jm@u{<` zmGFp30B;ThNuA6EKz{r@kxThSK;6s_P4;Ixksd$I=LQxZwJlc1S$U^CJ_3!&-zSl? z&R5v3Uy0#I=Hrm&r4UNGK(W1A2i*kLmBFJG!4C%=%u;%T_L=B6r4sI?KGS6U#k|(GmC7jB50P;n9P=xNpw*Iz>aCS z#^yQGm{FDyXO-Ki%%dr3&4Q=tIqflo^T9N)=Rq9YEePX`F`Pg}nh?sJQVyiaCWSuk z^c(kwv*gA10iHEd;R%GLGzxY7*01~IF6oV5)mVG0nSXkqPCIC#Vgqk$g^I2&;m&~B zwR`oaH{A!nA31-1yBtfap|?NC;QJJuVd-&nv!*^O9HMj(5Id-`ytUY;;xoV&QrhmL z3(VVtN+G&Ko$P!(0u?%fWu{AE=iY-zWts$A9u#>7!(pdFG~sU(4dvLf)wYiI>$-fY zU=|(%*AnE7kzpPPjIlXbssNP$uh(ACx&!PW;s12>dfGbWmSZ2{Sd9t0z9~VKB^a2 zQo&kaEJMV5(-884w#6O_hboAgm#BX`2JnXW?yLQ;_U<|=s%~unIE_emyD3pXN|9Dt zX+cUbC;^dBN(n_Ilu$_pQEX5|N)Q84LID8<>2~Ojp+oT7`>mqAZ&Zi~@R7^TsIy1C!=(QK_tOhgQZ<=UxyT5$S z*;Tkyw~b2k%E8#+w$E2q$V-%{`GdNFzFgKPq?bzqyOJ2s}6L(+RtRPQkH+cAj`Ax&STiblXiyBJ3 zVnu_ks?PVk=FL!+w7C&GF0BRTF{XZ>D#Iq!+fK{{{ji)ZO)q{^9)I9Mg&Ns(Ptnde zz~1!KNyn@|F=&RBvdWV2ygWAPDbo#-I{CgpXI*ne7Z7ltruX@-UzTczE!lI$i$=6N zFj;S;TZ>}JFgFU8nw2-aSJLx$`QX7UPQGAE^VabYiYOe;afTonH;#a zq#bHs;CW}xcAQI6Jj1o`dhHoPquLl(DP6Wi@cnrCp~KvD4J_P(Izt!b^5OdraRtZR zg(%@CeBP&(SHNiU+v5{zsksl*9~?}$@-EY>VqfX(YXk9P^jlTui|f3>y`r2}(ZCxS zChjZcb-OR0zLg!UMtVPPTM>oQDH6KD0x-@oYw`S(O5S&N59V9jg@wgmTskCen5LuY z0Ec~4BMF?y_`>Qa`nY8&1-oVWKFvhKYT}gKZIQ%~nv;g)_c&X?!?8ums=nEt2)$!r zJoNQ(T#{7JX}`~FM9EZ2zR_@{1uw|b;^ptINGQ?yybqw$rE&M6b*u1nUQDx2*_ zCj1a^N7us|k5s-$Pp=y_wmb0Anz#VlrI+wPfgxbmu9(*FIS2h263KC9WzG9^Fiv)KW{ z>blwF7*cwDC(bcR9~eDb^K4wUeYvM;4|Qu{Ni_HX{Bz!kNnx9R9xse*ov?lQ`Q{1W zE4{U{;Z2^`u%KX_79idlc%~{jp&(kO@tmXfn+gY4LvVDsH)3Y7=U4zBOy2o`pSDX$# zc_|1Qv}gQryLXFGtBBTHQ#MV9rCHOVPP=oImJjl^sbB5Y@_Qs`Qgs)kxBdDuFSpCj z`s23JX(ooDhq|RE6bZe(2PUUuwd|5}z+1lfi?Hn*k$rC`lb*s0@+3F%>!w2P+wUeQ znBUU5?GA3q=8ZAb+m8HN+Q+x;9?J=uP^CN1#LYZCxCtB;9G!Q9o?8+5%P~XPDW1+L zGJ_pKhY1Jbh);d_@OfElSKEtza-ddiLt$tCTGcD?W>99GouS0X3|-6+#@gVDt_|ZY z_E0cf-dg{O$B{okIMDor^@}S?GwG6B%hX+M+@%igJ}ce;ZYX^_AV~gJ`LMgwO2>yx z+3|5X*W5}t?3FL8zf86(0;#A2k8Qks_lRoWo4T1_dTo!b)O$iQ>zz7Xa~E~x#=ip@ zv9e^kU0HLYbKZBgwM@BPQ@=HG*htlcGUeXDa8^zAQ>{i*6N=W#B0xu3pLzcTmE z{t+_+*8qc?PhWQW!J_17;?Y2o@LlW_&XFXXJ!Ch|SMV%LF(;-TI3aeGCfo+}#tH;t z6^_wZ4~{VTJ~C~gFCltwv-sr1R?GZLY$8>uz*vp8;Dm<@4IH;#)E#=3g~d9xx@O+T zXQnWxIY&5*fH>t*Nz6MhF;A9M(*(|%-K+aWN7%xv9_HkEoOhM^qz$(0d(KV#b|9#7 zw%06wRiVw4PFMY*@hrpqe5H@5M?oM6IAUfmFF>MkoVht(MMj*Ih?YX}$$1xGxb;iE zjiM|8IM7k3gxMihlVQQw9@{1Dq}cH^B!;wDk}a=;gI+Q@ohVf3$F^t zNEPk7b&r4O>3bEm1rA&5{6Ro5{ZoCP=NC80bSvN7r|LEtx}lcMF85czycH>0W()fX zqD-xPW|Bvn+pqf3=4(pMkGNcWJWFHnz~uxJ^@RGte!v~6OOzTy<&f4;G~3Q z)X3LQ9`Pzab@l8Luwg*Uc0`xIC$R=zzbV<+O!&~JFGy4>E$&y~zi(pEA9t}v-S3XII7@73Ua1IH~WFtgyQ6UNbg z=-i`_(sAY+;Iw~!{_0yd&5y6-rxNHWnkyw#-4guRim3(?x@-(>`@q&*YeE5X?3J`F zafRp7?@**S#VQr%sJ_d8V@uw6?Db2qQ)F+YyuEVemTA`c>-0{=E@!R%D*FU^)cWdU zxy)2JfJ2Q=#CzA03zcNu>RFT7X_?obeLLl1V57Us0W{D#j& z`Qa=-yNUAoRH`z~k9pjyPsZ)ufja_63Zq>2)=nAaQW4x19kji$5*e|2O=RIx5q+Ak zDAdpJzJ^txU)h9FsyJ3>#vLSyD-^(XZhX1)imAB=Np%Ub zXj0pDsuiu8I?k0H9%BSGA_n@Jd-|R8&mGvCP)X0FrESM${?wq=WJb;Zv(;TC;2RKK zp!T4f+h`w2Ku2eBzYVe2_7F40;j3tCCd(qROwnDA(H_ zlt6!mF;xy@Y0kuQQR5XHcB9aObd{_ruv(nAwqsXK*9@oUx#rLAW_K`3TlDs9lj?Ho z+347xL!igoGi>_e5f>htm}9yZG<}q3V{<~^w`z1J4eqf0n06Yxi)f$`ZNDHKLRbUJ z{La?AxL$3x)aWs}D#u}7c&XwLh!9SEuUE1~$aO$wzJ!V-Qp^4G>xzjYx>q4h#pSv) z9YDydK;W>SLFVpHvd7>@nLfvcGn2l0qW9bgV08Iq7)*h6v#=hE;jN0%SR76tne}0ej-ZA;Lv?i;J;&8-`?2@%t^i5 zNqoE}Q!Ud-yUxf|?9RDIq3>-ZW;G0`GIyo38-mm*zLsxiWA~gkwB<>xTQR0OnD;s> z^n~nUp6IK1z6StE{OByjB*`!mo6-1>W}QBB{pZ`mgbq|Crn20n*v}Qwpox)Bc>Iiz z>!JgOoWRPGiQ|Ox{55^L8+UQTUJk7Xr89|4J4VxFs=C;i`o!NyXS(z1GrV0DrTp}q zv#8>pIxuUzV|A+I+Pky7=JNN4Y9}0x)4n`mq?GU@uGzZB(sCZe++y!mvszKx+46Cg z_Ca=eyhzpCo=ndg8g=F-r#^;dP;YkXg;UEo^`nld=ADAmB;W5Ud&b*zWV|%0>nQfk zg9qbFZG@|C%3E9RXXKh#+Ve1J>wr!8={Eu?e)^X^WQeu{!%?wEaZ6x7?Z8PHIBrfg zirJj37`^d20uCGd=4h%f2-7wIG}g| zE53SI^0ZQk9T?~4;!^524vSLFYf!hEz7b?k*CDG(n7M3ryK{B-D?Pw1FYpewX`_g( zseKpvC1z)z;j4uA&ku<*j$9BjGnP{UDKf^*2am`563AuLh|ROO)0DQ&=KwpM+gFdC z{ZRcp63E?Zd>In)T$O<&j5O|JMWjji`RJ4})@0VcN%pD;nq=@Xrp?h%&YR#zsJ_1u zO-iB4@jk9tj>C3J60Fqor^P&hPJvOW$<q zb$;wIa94lnO>o}0(!o5FPmb@Ug|wy5=8H>2d;sI&LEJa@a30+<&Ey)w3^93sz{X%zo!BbUMzP%a5 zx^UcFk;K0y%xJ@u2lhk@OX}yrlTj3Q0Lf7Q0@=%NXwH@l5pF#aka>}WrE#feiPNN< zNK@-O)*7^=m`RrxE>K>K_m!yWvcz*OY1&rI{+SzGnxgW_1-)29gD zyZNQIh2$J_mZnN9A@!e6+`$^H35tDv4F@p+`BXxmNV)F$U$Z4K;Sr5FiVcaH(qDXX zT?V93JR9f}kjqq#B^EwA@Tt~_N2|)6!^7ib2OPFg!8tJbVt{_xf-XALUGm#&y<6;e zCGNeMX89JPV6pw-8yBG-U}kbbBQ{v8w1WFpZsYc36Dl*d22SqT+0B|X=1td0wP*=sg=S~H6}}ffJ-&nL>p&8LLMb?Tdv&3VZ_Cu8 zqlojNj!u$Hrozd7?_6>6r}bZQochOs41cv^!+2dkwm$&$Q9ft=&UV;ZPT8ESGvI-4 z=qf=h_my_LzML@*G{TpMTMbXtHi z^0+hSV~Lb0jj3Ba{2oIWf41-Rz83K$cS^wcX&o^r$y`uOrvFIy)|H=^Gdp>qcFR@u z*33xwF+q8UmoXHqpr_TNZ7D*9Eq7pK53NsV#Yg#jt%I&`*h};ZL|>W{L8>h)*CFGU zBNJ)NDW|9(y%#6>AbM!hL$;Yit=ziAvjZqfjuT1j3XtHh+^$Kj=&AO?;Jm#SfyE%t zw@d0T=CJ92Q0QTgzkhnj>FJnhK)aAgNfaS|l|NzRWQG;Z#{%@kwA{)S182X95=BZbX) z4HP~|#JMduMoXVMYY`7W?=x%Yzz-M%0?Di-Uhhe@G94ka+Wn+ZEu(WKN#vuc<_NHQ zS*Q6e+w3BDUmBOY`jCO|D-AQ>uA^=R%j{ksA}$UB&AUs2&Pm~nW&86Y?1bc#Z>k;M zSD}%mo=Nn|ia?bz8pv>J7l5>&G(&dJTgM`)av!{$?oErIPcVH7hwbC!2YOtHqf-Gh z^QjQ!qtb0kZ&%}d-@Kfnc@|JxuD6YkI0n$@zY{!Ly+a|5Q#etGM31(}JeRcJC`)#k zN=|y=V9+yAy;sOqt+K1O$HUO_D5br6#ooT#p{m~=yUj#Q=Ic-?!7E!pGO;~oyn=;jWw+=Up^9TDT z8DdAUG9E(uI{NaVFn5j@aNN97I&FRdHv5CB%SpLGJBQD~oey9A$J>!XgyF`4^#*h0JQNErt~8Y@R)8ir{Q; zb{dr7TBf_Sb>ae97I*AD?S%J~6}OihCL4B(Tzu+JbBvfs{#3@nOg^c*nz-FN<9pxK zENsXJ8>C@}FmD=QF$dN&V4-QW9~09wG6i5aJXmPLPX73wQVVN!xMkI5XL|Y@>tWkA zSe;u2ux>F5`?kTBf3WDaG(Fx|d!@Mt3p>Is|Ae(GOaUlwgH7fZVfkowYOFS?xgXzV zZgFO6^ha51_Y|(Yu>cp)n(S}-Qa3yV%T7OG=fIJns-h0qxebnG5zNgDc4gExj$`Lx z*$wvA8^@&73{1meFE8UNVm+XuVQ6X^RXnd3B>x9JpEx zEaUa|wKQ~(&CI}iS(urgzzrMM_;0eeDytc{W*Ph?ShxD|xdL`Nghdxzv$@gQ)Fv

O=HtGX3d z!(jPuX@(p-+5X}?9J7@MSilZ-mo@ZFP2+AjKR?>r_O@+k5|;DeLR?dmt>4)5VCy`% zU>7XxO!o}7HS`S6z}39qAD zOlXC*vnAL=Z+7s4m()=wRUmXcx- z*|TLBR`Otpcxh&K0MpRg3kMDVAgnD9RaQ2`#pr%_a$J}m8aY$eFpOKx4{mJJW6cGX zu&4=ZY`6+CcCz8=r+&Cu;3}+hlVd%Vja_i7z%@vm1EYQ2Pn$+3W?+>M7J?@+UklrC z3(768EKFg02iqD!>-gLh><)?h`?$>luJ{JKA7Zf`HLsiD5`2rx9E)(=eVC$*)}C2h zlgY*DneqDc_Th2(ARu>tSKasSDJTs$8rV~_ro3fh5LO1^gP0y3iu%?F;D!x1^JUm? zvZbW>`{?AtEKKJnC+c6q&0=PD0p7=OPh}GZ3)>{Z`sw^oZ};ogiSZd&7=)Vz_D4+) zh86Arurj$cjqMx!QQI;(3u~(jaKQa=M~1BoNm_A5G%^|tDJN_+NJK+GfFYv9Fz)zC zDvKfY!H9EWNPPde8e^7?Avz6jNj`+({rwxl7&};~^T8x#ca!RM5y`^qnDJM1X~BBj zaSZ=W4C7b$uf*AyZ3Kh_e_dX#{j?rWu3fhtS%~mASdZmv`Fivv#$Uf4cah-p_1L+V zug54-{PpWml?$m#5#4KU8 z`*_9E+wo8-Z1`x2KZl=`1D~#)%D?XN&r#QJ^>>LJlMxnp#pok=nWm%o76*}Ae&_Zl z@o5}BU2?{!vH0}57e39!w?X*3sBL_luf&d8|5hY;xwb8;_~2nS-tzCBA72{s$4j-W zZEr=dk<>rhD*76e>+w>K=J-8RWe!RGBdI|KvC_3?_5YbbL`b+LU_Su??hE%!%LGSW z1ADz#6Tt2?M1(|y#6-kIB*Y}dq$H#yWTa%Ih1?5)CtyGj$ zl+;wzR5a8y)U-6TG<39dwDffJbPV(i^o$IQ3`~qnjLb~TOf1YS%&aV|ENrZ7tn6&; zY#i(y?3^5&99*1SoZMX8T-&&}af5AO8xPZD^=sGz8@PjR2({(bxRDeYI< zudJl3q@t{%tg51_qNb{*s;;K4rlGE(en8`Z#=!#z4`?3LJgB9qrKzo@t)-)_qpho> ztD~o@r>n20uXjlQkiNkogF}V}h6aZX4;vaCHacurPdhrDb~^3s5^ziiX^7QgN=XK8O{JHbzyw7`|_wn}e_Vw}g@$>cbz2JAj@8X4v7yK{! zU%cdh$v@yyz@^IpmjePX2VM>e3)O=-8Opn7G)u*qd=T<8Ix&bu<1} z{H=ueg!tPDw-XX?C*Ho3cqj4hox69E?k3&6mvk@b{=NJ69^8L$|KWp&4<0>y^ziYc z$B&*oe)2f^N%E7F-VqCr9 z)I%LVDGB>8o|E7XH?Pl0(A#QsXb2q|LWhRX;Z1aS6CK|C-#IN0BpoDI?b z_fc5_g7wIb8#cVG$Evk_Jzm0p#D!oz?p|BjV?Fk*p)S=bNv`PiyCwuSdbP^T*d?=-Tv4GPeDU?Di(i3(U_E9~;2*$xq@%>=>(QMG zpRdPyYJ9#PrD^dw`i=yU01`j~NB{{S0VIF~kN^@u0!RP}AOR$R1dsp{Kmter2_OL^ zfCP{L5mp? z-H>x`$iar3e?z_l=IDY1kN^@u0!RP}AOR$R1dsp{Kmter2_OL^fCP{L552+{}Mj?YoGk*>H07IUE)IZ*&}$x>|=Ntoi)WLNbz6Z2#8m+Cf2sA z|C#U-j&!~MOf&o$m#_DqA=!{`*^pCh$murZOdE2x4LNQT`}6PdY{&&R None: - post_processor = ReaddyPostProcessor( - trajectory=ReaddyLoader( - h5_file_path="subcell_analysis/tests/data/readdy/actin_ortho_filament_10_steps.h5", - timestep=0.1, - ).trajectory(), - box_size=np.array(3 * [600.0]), - periodic_boundary=False, - ) - test_control_points = post_processor.linear_fiber_control_points( - axis_positions=[[axis_positions]], - segment_length=segment_length, - )[0][0][0:10] - assert False not in np.isclose(test_control_points, expected_control_points) From 305b0d0d0d8db10a2a554874fe06c13f70c94e98 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 25 Jul 2024 12:22:40 -0700 Subject: [PATCH 60/63] don't run tests on build since there aren't any --- .github/workflows/build.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dcb9349..ad3f607 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,12 +53,6 @@ jobs: conda env update --file environment.yml --prune pdm sync - - name: Test with pytest - shell: bash -l {0} - run: | - pdm run pytest --cov --cov-report html - rm htmlcov/.gitignore - - name: Publish coverage report if: ${{ github.ref == 'refs/heads/main' && matrix.python-version == '3.10' }} uses: JamesIves/github-pages-deploy-action@v4 From 497bcb5953072f1381e0b0d0fde8c9a78121bb5f Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Thu, 25 Jul 2024 13:28:46 -0700 Subject: [PATCH 61/63] calculate total steps for readdy --- ..._process_readdy_compression_simulations.py | 12 ++------ ...ocess_readdy_no_compression_simulations.py | 7 ++--- subcell_pipeline/simulation/readdy/parser.py | 30 +++++++++++++++++-- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py index 523d9f1..7463388 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_compression_simulations.py @@ -52,14 +52,6 @@ # Number of monomer points per fiber n_monomer_points = 200 -# Total number of steps for each condition -total_steps: dict[str, int] = { - "0047": int(3.2e8), - "0150": int(1e8), - "0470": int(3.2e7), - "1500": int(1e7), -} - # Temporary path to save downloaded trajectories temp_path: Path = Path(__file__).parents[3] / "aws_downloads" temp_path.mkdir(parents=True, exist_ok=True) @@ -85,6 +77,6 @@ n_replicates, n_timepoints, n_monomer_points, - total_steps, - str(temp_path), + compression=True, + temp_path=str(temp_path), ) diff --git a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py index 85e1fe0..18779fa 100644 --- a/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py +++ b/subcell_pipeline/simulation/readdy/_process_readdy_no_compression_simulations.py @@ -48,9 +48,6 @@ # Number of monomer points per fiber n_monomer_points = 200 -# Total number of steps for each condition -total_steps: dict[str, int] = {"": int(1e7)} - # Temporary path to save downloaded trajectories temp_path: Path = Path(__file__).parents[3] / "aws_downloads" temp_path.mkdir(parents=True, exist_ok=True) @@ -76,6 +73,6 @@ n_replicates, n_timepoints, n_monomer_points, - total_steps, - str(temp_path), + compression=False, + temp_path=str(temp_path), ) diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index 327edc5..79a8ffc 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -1,6 +1,7 @@ """Methods for parsing ReaDDy simulations.""" import os +from math import floor, log10 from typing import Optional, Union import boto3 @@ -47,6 +48,9 @@ BOX_SIZE: np.ndarray = np.array(3 * [600.0]) """Default simulation volume dimensions (x, y, z).""" +COMPRESSION_DISTANCE: float = 150.0 +"""Total distance the fiber end was displaced in nm.""" + def _download_s3_file(bucket: str, key: str, dest_path: str) -> Optional[str]: """ @@ -205,6 +209,15 @@ def parse_readdy_simulation_single_fiber_trajectory( return dataframe +def round_2_sig_figs(x: float) -> int: + return int(round(x, -int(floor(log10(abs(0.1 * x)))))) + + +def velocity_for_cond(condition_key: str) -> float: + """'NNNN' -> NNN.N.""" + return float(condition_key[:3] + "." + condition_key[-1]) + + def parse_readdy_simulation_data( bucket: str, series_name: str, @@ -212,7 +225,7 @@ def parse_readdy_simulation_data( n_replicates: int, n_timepoints: int, n_monomer_points: int, - total_steps: dict[str, int], + compression: bool, temp_path: str, ) -> None: """ @@ -232,11 +245,22 @@ def parse_readdy_simulation_data( Number of equally spaced timepoints to sample. n_monomer_points Number of equally spaced monomer points to sample. - total_steps - Total number of steps for each simulation key. + compression + If True, parse compressed trajectories, + If False, parse baseline uncompressed trajectories. temp_path Path for saving temporary h5 files. """ + total_steps: dict[str, int] = {} + if compression: + total_steps = { + cond: round_2_sig_figs( + (COMPRESSION_DISTANCE * 1e-3 / velocity_for_cond(cond)) * 1e10 + ) + for cond in condition_keys + } + else: + total_steps = {"": int(1e7)} for condition_key in condition_keys: series_key = f"{series_name}_{condition_key}" if condition_key else series_name From d0328b57996de3e5eed7d94691a9b936175f35a8 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 29 Jul 2024 15:32:04 -0700 Subject: [PATCH 62/63] remove .pdm-python and add to gitignore --- .gitignore | 1 + .pdm-python | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 .pdm-python diff --git a/.gitignore b/.gitignore index b5be84e..67deb6d 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,4 @@ ENV/ *.simularium **/analysis_outputs/** *.h5 +*.pdm-python diff --git a/.pdm-python b/.pdm-python deleted file mode 100644 index e471425..0000000 --- a/.pdm-python +++ /dev/null @@ -1 +0,0 @@ -/Users/blairl/Documents/Dev/subcell-pipeline/.venv/bin/python \ No newline at end of file From f2dfd595aa0fa1c22291701e582c464096478726 Mon Sep 17 00:00:00 2001 From: Blair Lyons Date: Mon, 29 Jul 2024 15:54:02 -0700 Subject: [PATCH 63/63] more changes from review --- .../analysis/tomography_data/tomography_data.py | 7 +++++-- subcell_pipeline/simulation/readdy/parser.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/subcell_pipeline/analysis/tomography_data/tomography_data.py b/subcell_pipeline/analysis/tomography_data/tomography_data.py index c400d55..8d4a8e4 100644 --- a/subcell_pipeline/analysis/tomography_data/tomography_data.py +++ b/subcell_pipeline/analysis/tomography_data/tomography_data.py @@ -1,3 +1,5 @@ +import os + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -296,9 +298,9 @@ def plot_tomography_data_by_dataset( output_key File key for results. """ - figure, ax = plt.subplots(1, 3, figsize=(6, 2)) for dataset, group in data.groupby("dataset"): + figure, ax = plt.subplots(1, 3, figsize=(6, 2)) ax[1].set_title(dataset) views = ["XY", "XZ", "YZ"] @@ -313,4 +315,5 @@ def plot_tomography_data_by_dataset( ax[1].plot(fiber["xpos"], fiber["zpos"], marker="o", ms=1, lw=1) ax[2].plot(fiber["ypos"], fiber["zpos"], marker="o", ms=1, lw=1) - save_figure(bucket, output_key, figure) + base_name, ext = os.path.splitext(output_key) + save_figure(bucket, f"{base_name}_{dataset}.{ext}", figure) diff --git a/subcell_pipeline/simulation/readdy/parser.py b/subcell_pipeline/simulation/readdy/parser.py index 79a8ffc..6582311 100644 --- a/subcell_pipeline/simulation/readdy/parser.py +++ b/subcell_pipeline/simulation/readdy/parser.py @@ -209,7 +209,7 @@ def parse_readdy_simulation_single_fiber_trajectory( return dataframe -def round_2_sig_figs(x: float) -> int: +def _round_2_sig_figs(x: float) -> int: return int(round(x, -int(floor(log10(abs(0.1 * x)))))) @@ -254,7 +254,7 @@ def parse_readdy_simulation_data( total_steps: dict[str, int] = {} if compression: total_steps = { - cond: round_2_sig_figs( + cond: _round_2_sig_figs( (COMPRESSION_DISTANCE * 1e-3 / velocity_for_cond(cond)) * 1e10 ) for cond in condition_keys