Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/PREPROCESSING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ Preprocessing is divided into three steps that use two different virtual environ

# Punctate structures: Generate pointclouds

Edit the data paths in the following file to match the location of the outputs of the alignment, masking, and registration step, then run it.
Use the preprocessed data manifest generated via the alignment, masking, and registration steps from image as input to the pointcloud generation step

```
src
└── br
└── data
   └── preprocessing
      └── pc_preprocessing
         └── punctate_cyto.py <- Point cloud sampling from raw images for punctate structures here
         └── pcna.py <- Point cloud sampling from raw images for DNA replication foci dataset here
         └── punctate_nuc.py <- Point cloud sampling from raw images of nuclear structures from the WTC-11 hIPS single cell image dataset here
         └── punctate_cyto.py <- Point cloud sampling from raw images of cytoplasmic structures from the WTC-11 hIPS single cell image dataset here
```

# Polymorphic structures: Generate SDFs
Expand Down
12 changes: 7 additions & 5 deletions src/br/analysis/visualize_pointclouds.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main(args):
for _, this_image in orig_image_df.iterrows():
cell_id = this_image["CellId"]
if not strat:
strat_val = this_image['structure_name']
strat_val = this_image["structure_name"]

if args.dataset_name == "pcna":
points_all, _, img, center = compute_labels_pcna(this_image, False)
Expand Down Expand Up @@ -108,10 +108,12 @@ def main(args):
if mem_ind is not None:
img_mem = img[mem_ind]

if (args.dataset_name == 'other_punctate') and (strat_val in ["CETN2", "RAB5A", "SLC25A17"]):
img_raw = np.where(img_mem, img_raw, 0) # mask by mem/nuc seg
if (args.dataset_name == "other_punctate") and (
strat_val in ["CETN2", "RAB5A", "SLC25A17"]
):
img_raw = np.where(img_mem, img_raw, 0) # mask by mem/nuc seg
else:
img_raw = np.where(img_nuc, img_raw, 0) # mask by mem/nuc seg
img_raw = np.where(img_nuc, img_raw, 0) # mask by mem/nuc seg

# Sample sparse point cloud and get images
probs2 = points_all["s"].values
Expand Down Expand Up @@ -168,7 +170,7 @@ def main(args):
center_slice=center_slice,
)
ax_array[2].set_title("Sampling sparse PC")
print(f'Saving {name}.png')
print(f"Saving {name}.png")
fig.savefig(Path(args.save_path) / Path(f"{name}.png"), bbox_inches="tight", dpi=300)


Expand Down
50 changes: 44 additions & 6 deletions src/br/data/preprocessing/pc_preprocessing/pcna.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import argparse
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -64,26 +66,34 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)

cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_csv(PCNA_SINGLE_CELL_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)

if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

path_prefix = SAVE_LOCATION
global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)

with Pool(40) as p:
Expand All @@ -97,3 +107,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for PCNA dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:

python src/br/data/preprocessing/pc_preprocessing/pcna --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_pcna/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
51 changes: 45 additions & 6 deletions src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
import warnings
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -96,25 +98,34 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)
cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_parquet(SINGLE_CELL_IMAGE_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)
df = df.loc[df["structure_name"].isin(SKEW_EXP_DICT.keys())]

path_prefix = SAVE_LOCATION
if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)

with Pool(40) as p:
Expand All @@ -128,3 +139,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for cytoplasmic structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:

python src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
60 changes: 49 additions & 11 deletions src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import argparse
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
from pyntcloud import PyntCloud
from scipy.ndimage import binary_dilation
from skimage.io import imread
from tqdm import tqdm

STRUCTS = ["HIST1H2BJ", "NUP153", "SMC1A", "SON"]


def compute_labels(row, save=True):
path = row["registered_path"]
Expand Down Expand Up @@ -64,31 +70,35 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)
cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_parquet(SINGLE_CELL_IMAGE_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)
df = df.loc[df["structure_name"].isin(STRUCTS)]

path_prefix = SAVE_LOCATION
if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)
# if str(row['CellId']) == '660844':
# print('yes')
# compute_labels(row)

from multiprocessing import Pool

with Pool(40) as p:
_ = tuple(
Expand All @@ -101,3 +111,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for nuclear structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:

python src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
Loading