Skip to content
24 changes: 2 additions & 22 deletions .codeocean/datasets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,8 @@
"version": 1,
"attached_datasets": [
{
"id": "02f566df-130c-489c-bcdf-a21242e333fc",
"mount": "HCR_807074_2025-07-30_13-45-00_processed_2025-10-02_06-30-15"
},
{
"id": "15f2c27b-3f03-4765-b2c7-0a98a0098fea",
"mount": "HCR_000000-s49_2025-08-13_13-00-00_processed_2025-09-10_22-57-56"
},
{
"id": "20240ace-9ff4-4f2f-8657-5be3757fb696",
"mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44_1"
},
{
"id": "29d33eaa-b409-4e07-8c7e-5665ca27b8cb",
"mount": "HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01"
},
{
"id": "3f6bc1aa-89db-439d-b4f9-087ca4833705",
"mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44"
},
{
"id": "52fe3938-46b5-4ddc-a2df-3c9ca3256a13",
"mount": "HCR_807074_2025-08-27_11-45-00_processed_2025-10-02_06-30-17"
"id": "0964fa71-ddd4-4ce3-9d9c-7446b2a8f004",
"mount": "HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29"
}
]
}
62 changes: 60 additions & 2 deletions code/aind_proteomics_stitch/bigstitcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,52 @@ def get_data_config(

return derivatives_dict, proteomics_dataset, acquisition_dict

def get_stitching_dict_proteomics(
specimen_id: str,
dataset_xml_path: str,
downsample: Optional[int] = 2,
relative_optimization_threshold: float = 2.5,
absolute_optimization_threshold: float = 3.5,
max_error: int = 3
) -> dict:
"""
A function that writes a stitching dictioonary that will be used for
creating a json file that gives parmaters to bigstitcher sittching run

Parameters
----------
specimen_id: str
Specimen ID
dataset_xml_path: str
Path where the xml is located
downsample: Optional[int] = 2
Image multiscale used for stitching

Returns
-------
dict
Dictionary with the stitching parameters
used for bigstitcher
"""
# assert pathlib.Path(dataset_xml_path).exists()

stitching_dict = {
"session_id": str(specimen_id),
"memgb": 100,
"parallel": utils.get_code_ocean_cpu_limit(),
"dataset_xml": str(dataset_xml_path),
"do_phase_correlation": True,
"proteomics_dataset": True,
"do_detection": False,
"do_registrations": False,
"phase_correlation_params": {
"downsample": downsample,
"relative_optimization_threshold": relative_optimization_threshold,
"absolute_optimization_threshold": absolute_optimization_threshold,
"max_error": max_error
},
}
return stitching_dict

def get_stitching_dict(
specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2
Expand Down Expand Up @@ -229,11 +275,23 @@ def main(
scale_for_transforms = int(scale_for_transforms)

# print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}")
proteomics_stitching_params = get_stitching_dict(
project_name = utils.get_project_name()
if project_name == "PLACE":
# print(f'Project name {project_name}: generating proteomics stitching dict...')
#use different parameters
proteomics_stitching_params = get_stitching_dict_proteomics(
specimen_id=proteomics_dataset_name,
dataset_xml_path=output_big_stitcher_xml,
downsample=scale_for_transforms,
)
)
else:
# print(f'Project name {project_name}: generating HCR stitching dict...')

proteomics_stitching_params = get_stitching_dict(
specimen_id=proteomics_dataset_name,
dataset_xml_path=output_big_stitcher_xml,
downsample=scale_for_transforms,
)
end_time = time()

output_big_stitcher_json = f"{results_folder}/{proteomics_dataset_name}_stitch_channel_{channel_wavelength}_params.json"
Expand Down
59 changes: 59 additions & 0 deletions code/aind_proteomics_stitch/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import time
from datetime import datetime
from pathlib import Path
import glob
from typing import Any, List, Optional, Tuple, Union

import matplotlib.pyplot as plt
Expand All @@ -25,6 +26,64 @@
# IO types
PathLike = Union[str, Path]

def get_project_name() ->str:
"""
Load the data_description.json file from the data directory and extract project_name .

Searches for data_description.json in multiple possible locations using glob patterns:
1. ../data/output_aind_metadata/
2. ../data/
3. ../data/{any_subdirectory}/

Returns
-------
str
The project name extracted from the 'project_name' field in data_description.json

Raises
------
FileNotFoundError
If no data_description.json file is found in any of the search locations
RuntimeError
If error occurs while loading or parsing the JSON configuration
"""
base_data_dir = Path("/data")

# Use glob to search for data_description.json in all possible locations
search_patterns = [
base_data_dir / "output_aind_metadata" / "data_description.json",
base_data_dir / "data_description.json",
# glob.glob(f"{base_data_dir.as_posix()}/data_description.json")[0], # Any subdirectory
glob.glob(f"{base_data_dir.as_posix()}/*/data_description.json")[0],

]

# Find the first existing file
json_file_path = None
for json_path in search_patterns:
if Path(json_path).exists():
json_file_path = json_path
# print(f"Found data_description.json at: {json_file_path}")
break

if json_file_path is None:
raise FileNotFoundError(
f"No data_description.json file found in {base_data_dir} or any of its subdirectories"
)

# print(f"Loading configuration from {json_file_path}")

try:
with open(json_file_path, 'r') as f:
config = json.load(f)
project_name = config.get('project_name')
if not project_name:
raise ValueError("'project_name' field not found in data_description.json")
# print(f"Loaded project_name : {project_name}")
return project_name
except Exception as e:
raise RuntimeError(f"Error loading data_description.json: {str(e)}")


def get_code_ocean_cpu_limit():
"""
Expand Down
37 changes: 30 additions & 7 deletions code/run_manually.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,33 @@ def run_offpipeline():
# # Printing to get output on batch script
# print(output_big_stitcher_json)

def run_bigstitcher():
processed_asset_name = f'HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01'

def run_HCR_bigstitcher():
processed_asset_name = f'HCR_000000-s43_2025-07-24_13-00-00_processed_2026-01-20_18-33-48'
stitching_channel=405
path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction"
voxel_resolution = (1, 0.3880046677791278, 0.3880046677791278)
voxel_resolution = (1, 0.2466335423895654, 0.2466335423895654)
results_folder = Path('/results')
acquisition_path = f"/data/{processed_asset_name}/acquisition.json"

bigstitcher.main(
path_to_data=path_to_data,
channel_wavelength=stitching_channel,
acquisition_path=acquisition_path,
voxel_resolution=voxel_resolution,
results_folder=results_folder,
proteomics_dataset_name=processed_asset_name,
# res_for_transforms=(0.76, 0.76, 3.4),
scale_for_transforms=4,
# If this is provided, res for
# transforms is ignored
)

def run_bigstitcher():
processed_asset_name = f'HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29'
stitching_channel=488
path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction"
voxel_resolution = (0.459, 0.0920179382864407, 0.0920179382864407)
results_folder = Path('/results')
acquisition_path = f"/data/{processed_asset_name}/acquisition.json"

Expand All @@ -58,8 +80,8 @@ def run_bigstitcher():
voxel_resolution=voxel_resolution,
results_folder=results_folder,
proteomics_dataset_name=processed_asset_name,
res_for_transforms=(0.76, 0.76, 3.4),
scale_for_transforms=2,
# res_for_transforms=(0.76, 0.76, 3.4),
scale_for_transforms=4,
# If this is provided, res for
# transforms is ignored
)
Expand All @@ -83,5 +105,6 @@ def combine_all_xmls():
print(f'Error combining xmls')

if __name__ == "__main__":
# run_bigstitcher()
combine_all_xmls()
run_bigstitcher()
# run_HCR_bigstitcher()
# combine_all_xmls()
2 changes: 1 addition & 1 deletion environment/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ RUN conda install -y -c conda-forge -c bioconda \
conda clean -ya

# --- Create Conda Env: proteomics_stitch ---
RUN conda create -y -n proteomics_stitch python=3.9
RUN conda create -y -n proteomics_stitch python=3.9

# --- Activate Environment and Install Python Packages ---
SHELL ["conda", "run", "-n", "proteomics_stitch", "/bin/bash", "-c"]
Expand Down