diff --git a/.codeocean/datasets.json b/.codeocean/datasets.json index f6bacca..258900b 100644 --- a/.codeocean/datasets.json +++ b/.codeocean/datasets.json @@ -2,28 +2,8 @@ "version": 1, "attached_datasets": [ { - "id": "02f566df-130c-489c-bcdf-a21242e333fc", - "mount": "HCR_807074_2025-07-30_13-45-00_processed_2025-10-02_06-30-15" - }, - { - "id": "15f2c27b-3f03-4765-b2c7-0a98a0098fea", - "mount": "HCR_000000-s49_2025-08-13_13-00-00_processed_2025-09-10_22-57-56" - }, - { - "id": "20240ace-9ff4-4f2f-8657-5be3757fb696", - "mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44_1" - }, - { - "id": "29d33eaa-b409-4e07-8c7e-5665ca27b8cb", - "mount": "HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01" - }, - { - "id": "3f6bc1aa-89db-439d-b4f9-087ca4833705", - "mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44" - }, - { - "id": "52fe3938-46b5-4ddc-a2df-3c9ca3256a13", - "mount": "HCR_807074_2025-08-27_11-45-00_processed_2025-10-02_06-30-17" + "id": "0964fa71-ddd4-4ce3-9d9c-7446b2a8f004", + "mount": "HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29" } ] } \ No newline at end of file diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 52fcf25..9327ef2 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -99,6 +99,52 @@ def get_data_config( return derivatives_dict, proteomics_dataset, acquisition_dict +def get_stitching_dict_proteomics( + specimen_id: str, + dataset_xml_path: str, + downsample: Optional[int] = 2, + relative_optimization_threshold: float = 2.5, + absolute_optimization_threshold: float = 3.5, + max_error: int = 3 +) -> dict: + """ + A function that writes a stitching dictioonary that will be used for + creating a json file that gives parmaters to bigstitcher sittching run + + Parameters + ---------- + specimen_id: str + Specimen ID + dataset_xml_path: str + Path where the xml is located + downsample: Optional[int] = 2 + Image multiscale used for stitching + + Returns + ------- + dict + Dictionary with the stitching parameters + used for bigstitcher + """ + # assert pathlib.Path(dataset_xml_path).exists() + + stitching_dict = { + "session_id": str(specimen_id), + "memgb": 100, + "parallel": utils.get_code_ocean_cpu_limit(), + "dataset_xml": str(dataset_xml_path), + "do_phase_correlation": True, + "proteomics_dataset": True, + "do_detection": False, + "do_registrations": False, + "phase_correlation_params": { + "downsample": downsample, + "relative_optimization_threshold": relative_optimization_threshold, + "absolute_optimization_threshold": absolute_optimization_threshold, + "max_error": max_error + }, + } + return stitching_dict def get_stitching_dict( specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2 @@ -229,11 +275,23 @@ def main( scale_for_transforms = int(scale_for_transforms) # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}") - proteomics_stitching_params = get_stitching_dict( + project_name = utils.get_project_name() + if project_name == "PLACE": + # print(f'Project name {project_name}: generating proteomics stitching dict...') + #use different parameters + proteomics_stitching_params = get_stitching_dict_proteomics( specimen_id=proteomics_dataset_name, dataset_xml_path=output_big_stitcher_xml, downsample=scale_for_transforms, - ) + ) + else: + # print(f'Project name {project_name}: generating HCR stitching dict...') + + proteomics_stitching_params = get_stitching_dict( + specimen_id=proteomics_dataset_name, + dataset_xml_path=output_big_stitcher_xml, + downsample=scale_for_transforms, + ) end_time = time() output_big_stitcher_json = f"{results_folder}/{proteomics_dataset_name}_stitch_channel_{channel_wavelength}_params.json" diff --git a/code/aind_proteomics_stitch/utils/utils.py b/code/aind_proteomics_stitch/utils/utils.py index 13fc01b..60f31f6 100644 --- a/code/aind_proteomics_stitch/utils/utils.py +++ b/code/aind_proteomics_stitch/utils/utils.py @@ -13,6 +13,7 @@ import time from datetime import datetime from pathlib import Path +import glob from typing import Any, List, Optional, Tuple, Union import matplotlib.pyplot as plt @@ -25,6 +26,64 @@ # IO types PathLike = Union[str, Path] +def get_project_name() ->str: + """ + Load the data_description.json file from the data directory and extract project_name . + + Searches for data_description.json in multiple possible locations using glob patterns: + 1. ../data/output_aind_metadata/ + 2. ../data/ + 3. ../data/{any_subdirectory}/ + + Returns + ------- + str + The project name extracted from the 'project_name' field in data_description.json + + Raises + ------ + FileNotFoundError + If no data_description.json file is found in any of the search locations + RuntimeError + If error occurs while loading or parsing the JSON configuration + """ + base_data_dir = Path("/data") + + # Use glob to search for data_description.json in all possible locations + search_patterns = [ + base_data_dir / "output_aind_metadata" / "data_description.json", + base_data_dir / "data_description.json", + # glob.glob(f"{base_data_dir.as_posix()}/data_description.json")[0], # Any subdirectory + glob.glob(f"{base_data_dir.as_posix()}/*/data_description.json")[0], + + ] + + # Find the first existing file + json_file_path = None + for json_path in search_patterns: + if Path(json_path).exists(): + json_file_path = json_path + # print(f"Found data_description.json at: {json_file_path}") + break + + if json_file_path is None: + raise FileNotFoundError( + f"No data_description.json file found in {base_data_dir} or any of its subdirectories" + ) + + # print(f"Loading configuration from {json_file_path}") + + try: + with open(json_file_path, 'r') as f: + config = json.load(f) + project_name = config.get('project_name') + if not project_name: + raise ValueError("'project_name' field not found in data_description.json") + # print(f"Loaded project_name : {project_name}") + return project_name + except Exception as e: + raise RuntimeError(f"Error loading data_description.json: {str(e)}") + def get_code_ocean_cpu_limit(): """ diff --git a/code/run_manually.py b/code/run_manually.py index d3c6af1..f52348c 100644 --- a/code/run_manually.py +++ b/code/run_manually.py @@ -43,11 +43,33 @@ def run_offpipeline(): # # Printing to get output on batch script # print(output_big_stitcher_json) -def run_bigstitcher(): - processed_asset_name = f'HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01' + +def run_HCR_bigstitcher(): + processed_asset_name = f'HCR_000000-s43_2025-07-24_13-00-00_processed_2026-01-20_18-33-48' stitching_channel=405 path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction" - voxel_resolution = (1, 0.3880046677791278, 0.3880046677791278) + voxel_resolution = (1, 0.2466335423895654, 0.2466335423895654) + results_folder = Path('/results') + acquisition_path = f"/data/{processed_asset_name}/acquisition.json" + + bigstitcher.main( + path_to_data=path_to_data, + channel_wavelength=stitching_channel, + acquisition_path=acquisition_path, + voxel_resolution=voxel_resolution, + results_folder=results_folder, + proteomics_dataset_name=processed_asset_name, + # res_for_transforms=(0.76, 0.76, 3.4), + scale_for_transforms=4, + # If this is provided, res for + # transforms is ignored + ) + +def run_bigstitcher(): + processed_asset_name = f'HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29' + stitching_channel=488 + path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction" + voxel_resolution = (0.459, 0.0920179382864407, 0.0920179382864407) results_folder = Path('/results') acquisition_path = f"/data/{processed_asset_name}/acquisition.json" @@ -58,8 +80,8 @@ def run_bigstitcher(): voxel_resolution=voxel_resolution, results_folder=results_folder, proteomics_dataset_name=processed_asset_name, - res_for_transforms=(0.76, 0.76, 3.4), - scale_for_transforms=2, + # res_for_transforms=(0.76, 0.76, 3.4), + scale_for_transforms=4, # If this is provided, res for # transforms is ignored ) @@ -83,5 +105,6 @@ def combine_all_xmls(): print(f'Error combining xmls') if __name__ == "__main__": - # run_bigstitcher() - combine_all_xmls() \ No newline at end of file + run_bigstitcher() + # run_HCR_bigstitcher() + # combine_all_xmls() \ No newline at end of file diff --git a/environment/Dockerfile b/environment/Dockerfile index e0c9376..a76bd62 100644 --- a/environment/Dockerfile +++ b/environment/Dockerfile @@ -23,7 +23,7 @@ RUN conda install -y -c conda-forge -c bioconda \ conda clean -ya # --- Create Conda Env: proteomics_stitch --- -RUN conda create -y -n proteomics_stitch python=3.9 +RUN conda create -y -n proteomics_stitch python=3.9 # --- Activate Environment and Install Python Packages --- SHELL ["conda", "run", "-n", "proteomics_stitch", "/bin/bash", "-c"]