From 01ff42ddb31e86d2462f97131ce9301b41329018 Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 21:40:41 +0000 Subject: [PATCH 1/9] add proteomics switch to bigstitcher.py for removing max_shift and min_correlation from phase correlation params --- code/aind_proteomics_stitch/bigstitcher.py | 51 ++++++++++++++++++- code/aind_proteomics_stitch/utils/utils.py | 58 ++++++++++++++++++++++ 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 52fcf25..22b68cc 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -99,6 +99,43 @@ def get_data_config( return derivatives_dict, proteomics_dataset, acquisition_dict +def get_stitching_dict_proteomics( + specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2 +) -> dict: + """ + A function that writes a stitching dictioonary that will be used for + creating a json file that gives parmaters to bigstitcher sittching run + + Parameters + ---------- + specimen_id: str + Specimen ID + dataset_xml_path: str + Path where the xml is located + downsample: Optional[int] = 2 + Image multiscale used for stitching + + Returns + ------- + dict + Dictionary with the stitching parameters + used for bigstitcher + """ + # assert pathlib.Path(dataset_xml_path).exists() + + stitching_dict = { + "session_id": str(specimen_id), + "memgb": 100, + "parallel": utils.get_code_ocean_cpu_limit(), + "dataset_xml": str(dataset_xml_path), + "do_phase_correlation": True, + "do_detection": False, + "do_registrations": False, + "phase_correlation_params": { + "downsample": downsample, + }, + } + return stitching_dict def get_stitching_dict( specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2 @@ -229,11 +266,21 @@ def main( scale_for_transforms = int(scale_for_transforms) # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}") - proteomics_stitching_params = get_stitching_dict( + project_name = utils.get_project_name() + if project_name == "PLACE": + #use different parameters + proteomics_stitching_params = get_stitching_dict_proteomics( specimen_id=proteomics_dataset_name, dataset_xml_path=output_big_stitcher_xml, downsample=scale_for_transforms, - ) + ) + else: + + proteomics_stitching_params = get_stitching_dict( + specimen_id=proteomics_dataset_name, + dataset_xml_path=output_big_stitcher_xml, + downsample=scale_for_transforms, + ) end_time = time() output_big_stitcher_json = f"{results_folder}/{proteomics_dataset_name}_stitch_channel_{channel_wavelength}_params.json" diff --git a/code/aind_proteomics_stitch/utils/utils.py b/code/aind_proteomics_stitch/utils/utils.py index 13fc01b..e4ca445 100644 --- a/code/aind_proteomics_stitch/utils/utils.py +++ b/code/aind_proteomics_stitch/utils/utils.py @@ -25,6 +25,64 @@ # IO types PathLike = Union[str, Path] +def get_project_name() ->str: + """ + Load the data_description.json file from the data directory and extract project_name . + + Searches for data_description.json in multiple possible locations using glob patterns: + 1. ../data/output_aind_metadata/ + 2. ../data/ + 3. ../data/{any_subdirectory}/ + + Returns + ------- + str + The project name extracted from the 'project_name' field in data_description.json + + Raises + ------ + FileNotFoundError + If no data_description.json file is found in any of the search locations + RuntimeError + If error occurs while loading or parsing the JSON configuration + """ + base_data_dir = pathlib.Path("/data") + + # Use glob to search for data_description.json in all possible locations + search_patterns = [ + base_data_dir / "output_aind_metadata" / "data_description.json", + base_data_dir / "data_description.json", + # glob.glob(f"{base_data_dir.as_posix()}/data_description.json")[0], # Any subdirectory + glob.glob(f"{base_data_dir.as_posix()}/*/data_description.json")[0], + + ] + + # Find the first existing file + json_file_path = None + for json_path in search_patterns: + if pathlib.Path(json_path).exists(): + json_file_path = json_path + logger.info(f"Found data_description.json at: {json_file_path}") + break + + if json_file_path is None: + raise FileNotFoundError( + f"No data_description.json file found in {base_data_dir} or any of its subdirectories" + ) + + logger.info(f"Loading configuration from {json_file_path}") + + try: + with open(json_file_path, 'r') as f: + config = json.load(f) + project_name = config.get('project_name') + if not project_name: + raise ValueError("'project_name' field not found in data_description.json") + logger.info(f"Loaded project_name : {project_name}") + return project_name + except Exception as e: + raise RuntimeError(f"Error loading data_description.json: {str(e)}") + def get_code_ocean_cpu_limit(): """ From effdc3b75e78734fd96604c679b17803246b254f Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 21:56:07 +0000 Subject: [PATCH 2/9] add max_error, relative and absolute thresholds --- code/aind_proteomics_stitch/bigstitcher.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 22b68cc..3becb7c 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -100,7 +100,12 @@ def get_data_config( return derivatives_dict, proteomics_dataset, acquisition_dict def get_stitching_dict_proteomics( - specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2 + specimen_id: str, + dataset_xml_path: str, + downsample: Optional[int] = 2, + relative_optimization_threshold: float = 2.5, + absolute_optimization_threshold: float = 3.5, + max_error: int = 3 ) -> dict: """ A function that writes a stitching dictioonary that will be used for @@ -133,6 +138,9 @@ def get_stitching_dict_proteomics( "do_registrations": False, "phase_correlation_params": { "downsample": downsample, + "relative_optimization_threshold": relative_optimization_threshold, + "absolute_optimization_threshold": absolute_optimization_threshold, + "max_error": max_error }, } return stitching_dict From 6f63cbcb9dcbfcc11d18d24ff6d8b819f4dec643 Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 22:06:30 +0000 Subject: [PATCH 3/9] add switch for proteomics phase correlation json --- code/aind_proteomics_stitch/bigstitcher.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 3becb7c..18e2910 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -134,6 +134,7 @@ def get_stitching_dict_proteomics( "parallel": utils.get_code_ocean_cpu_limit(), "dataset_xml": str(dataset_xml_path), "do_phase_correlation": True, + "proteomics_dataset": True, "do_detection": False, "do_registrations": False, "phase_correlation_params": { @@ -276,6 +277,7 @@ def main( # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}") project_name = utils.get_project_name() if project_name == "PLACE": + print(f'Project name {project_name}: generating proteomics stitching dict...') #use different parameters proteomics_stitching_params = get_stitching_dict_proteomics( specimen_id=proteomics_dataset_name, @@ -283,6 +285,7 @@ def main( downsample=scale_for_transforms, ) else: + print(f'Project name {project_name}: generating HCR stitching dict...') proteomics_stitching_params = get_stitching_dict( specimen_id=proteomics_dataset_name, From 442a4dbe0e0e52288a8f481da9951e48a213bd85 Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 22:50:56 +0000 Subject: [PATCH 4/9] remove print statements for production --- code/aind_proteomics_stitch/bigstitcher.py | 4 ++-- code/aind_proteomics_stitch/utils/utils.py | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 18e2910..9327ef2 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -277,7 +277,7 @@ def main( # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}") project_name = utils.get_project_name() if project_name == "PLACE": - print(f'Project name {project_name}: generating proteomics stitching dict...') + # print(f'Project name {project_name}: generating proteomics stitching dict...') #use different parameters proteomics_stitching_params = get_stitching_dict_proteomics( specimen_id=proteomics_dataset_name, @@ -285,7 +285,7 @@ def main( downsample=scale_for_transforms, ) else: - print(f'Project name {project_name}: generating HCR stitching dict...') + # print(f'Project name {project_name}: generating HCR stitching dict...') proteomics_stitching_params = get_stitching_dict( specimen_id=proteomics_dataset_name, diff --git a/code/aind_proteomics_stitch/utils/utils.py b/code/aind_proteomics_stitch/utils/utils.py index e4ca445..60f31f6 100644 --- a/code/aind_proteomics_stitch/utils/utils.py +++ b/code/aind_proteomics_stitch/utils/utils.py @@ -13,6 +13,7 @@ import time from datetime import datetime from pathlib import Path +import glob from typing import Any, List, Optional, Tuple, Union import matplotlib.pyplot as plt @@ -46,7 +47,7 @@ def get_project_name() ->str: RuntimeError If error occurs while loading or parsing the JSON configuration """ - base_data_dir = pathlib.Path("/data") + base_data_dir = Path("/data") # Use glob to search for data_description.json in all possible locations search_patterns = [ @@ -60,9 +61,9 @@ def get_project_name() ->str: # Find the first existing file json_file_path = None for json_path in search_patterns: - if pathlib.Path(json_path).exists(): + if Path(json_path).exists(): json_file_path = json_path - logger.info(f"Found data_description.json at: {json_file_path}") + # print(f"Found data_description.json at: {json_file_path}") break if json_file_path is None: @@ -70,7 +71,7 @@ def get_project_name() ->str: f"No data_description.json file found in {base_data_dir} or any of its subdirectories" ) - logger.info(f"Loading configuration from {json_file_path}") + # print(f"Loading configuration from {json_file_path}") try: with open(json_file_path, 'r') as f: @@ -78,7 +79,7 @@ def get_project_name() ->str: project_name = config.get('project_name') if not project_name: raise ValueError("'project_name' field not found in data_description.json") - logger.info(f"Loaded project_name : {project_name}") + # print(f"Loaded project_name : {project_name}") return project_name except Exception as e: raise RuntimeError(f"Error loading data_description.json: {str(e)}") From da9294e09c1d66dd04365e7c9f7c2dcb00920b1c Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 23:07:56 +0000 Subject: [PATCH 5/9] changed datasets and tested manually --- .codeocean/datasets.json | 24 ++---------------------- code/run_manually.py | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/.codeocean/datasets.json b/.codeocean/datasets.json index f6bacca..258900b 100644 --- a/.codeocean/datasets.json +++ b/.codeocean/datasets.json @@ -2,28 +2,8 @@ "version": 1, "attached_datasets": [ { - "id": "02f566df-130c-489c-bcdf-a21242e333fc", - "mount": "HCR_807074_2025-07-30_13-45-00_processed_2025-10-02_06-30-15" - }, - { - "id": "15f2c27b-3f03-4765-b2c7-0a98a0098fea", - "mount": "HCR_000000-s49_2025-08-13_13-00-00_processed_2025-09-10_22-57-56" - }, - { - "id": "20240ace-9ff4-4f2f-8657-5be3757fb696", - "mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44_1" - }, - { - "id": "29d33eaa-b409-4e07-8c7e-5665ca27b8cb", - "mount": "HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01" - }, - { - "id": "3f6bc1aa-89db-439d-b4f9-087ca4833705", - "mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44" - }, - { - "id": "52fe3938-46b5-4ddc-a2df-3c9ca3256a13", - "mount": "HCR_807074_2025-08-27_11-45-00_processed_2025-10-02_06-30-17" + "id": "0964fa71-ddd4-4ce3-9d9c-7446b2a8f004", + "mount": "HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29" } ] } \ No newline at end of file diff --git a/code/run_manually.py b/code/run_manually.py index d3c6af1..f52348c 100644 --- a/code/run_manually.py +++ b/code/run_manually.py @@ -43,11 +43,33 @@ def run_offpipeline(): # # Printing to get output on batch script # print(output_big_stitcher_json) -def run_bigstitcher(): - processed_asset_name = f'HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01' + +def run_HCR_bigstitcher(): + processed_asset_name = f'HCR_000000-s43_2025-07-24_13-00-00_processed_2026-01-20_18-33-48' stitching_channel=405 path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction" - voxel_resolution = (1, 0.3880046677791278, 0.3880046677791278) + voxel_resolution = (1, 0.2466335423895654, 0.2466335423895654) + results_folder = Path('/results') + acquisition_path = f"/data/{processed_asset_name}/acquisition.json" + + bigstitcher.main( + path_to_data=path_to_data, + channel_wavelength=stitching_channel, + acquisition_path=acquisition_path, + voxel_resolution=voxel_resolution, + results_folder=results_folder, + proteomics_dataset_name=processed_asset_name, + # res_for_transforms=(0.76, 0.76, 3.4), + scale_for_transforms=4, + # If this is provided, res for + # transforms is ignored + ) + +def run_bigstitcher(): + processed_asset_name = f'HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29' + stitching_channel=488 + path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction" + voxel_resolution = (0.459, 0.0920179382864407, 0.0920179382864407) results_folder = Path('/results') acquisition_path = f"/data/{processed_asset_name}/acquisition.json" @@ -58,8 +80,8 @@ def run_bigstitcher(): voxel_resolution=voxel_resolution, results_folder=results_folder, proteomics_dataset_name=processed_asset_name, - res_for_transforms=(0.76, 0.76, 3.4), - scale_for_transforms=2, + # res_for_transforms=(0.76, 0.76, 3.4), + scale_for_transforms=4, # If this is provided, res for # transforms is ignored ) @@ -83,5 +105,6 @@ def combine_all_xmls(): print(f'Error combining xmls') if __name__ == "__main__": - # run_bigstitcher() - combine_all_xmls() \ No newline at end of file + run_bigstitcher() + # run_HCR_bigstitcher() + # combine_all_xmls() \ No newline at end of file From f5a9adf4f1edbb0c5e6e2499693af9728fa81ae9 Mon Sep 17 00:00:00 2001 From: cbrrrry Date: Wed, 21 Jan 2026 23:09:47 +0000 Subject: [PATCH 6/9] Edited Dockerfile --- environment/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment/Dockerfile b/environment/Dockerfile index e0c9376..a76bd62 100644 --- a/environment/Dockerfile +++ b/environment/Dockerfile @@ -23,7 +23,7 @@ RUN conda install -y -c conda-forge -c bioconda \ conda clean -ya # --- Create Conda Env: proteomics_stitch --- -RUN conda create -y -n proteomics_stitch python=3.9 +RUN conda create -y -n proteomics_stitch python=3.9 # --- Activate Environment and Install Python Packages --- SHELL ["conda", "run", "-n", "proteomics_stitch", "/bin/bash", "-c"] From fc93dcbab45760fabb534863a8e44b3e76cbe05b Mon Sep 17 00:00:00 2001 From: seanfite-alleninstitute Date: Thu, 12 Feb 2026 19:45:26 +0000 Subject: [PATCH 7/9] Updated get_stitching_dict_proteomics func by changing absolute opt threshold from 3.5 to 2.5 --- code/aind_proteomics_stitch/bigstitcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 9327ef2..0302a62 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -104,7 +104,7 @@ def get_stitching_dict_proteomics( dataset_xml_path: str, downsample: Optional[int] = 2, relative_optimization_threshold: float = 2.5, - absolute_optimization_threshold: float = 3.5, + absolute_optimization_threshold: float = 2.5, max_error: int = 3 ) -> dict: """ From 3aefcfeef98293f7046a59a09e79ca1fd96ddb37 Mon Sep 17 00:00:00 2001 From: seanfite-alleninstitute Date: Thu, 12 Feb 2026 20:40:59 +0000 Subject: [PATCH 8/9] Updated get_stitching_dict_proteomics func params to set all thresholds and max error to 2 --- code/aind_proteomics_stitch/bigstitcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 0302a62..1eb0a8c 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -103,9 +103,9 @@ def get_stitching_dict_proteomics( specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2, - relative_optimization_threshold: float = 2.5, - absolute_optimization_threshold: float = 2.5, - max_error: int = 3 + relative_optimization_threshold: float = 2, + absolute_optimization_threshold: float = 2, + max_error: int = 2 ) -> dict: """ A function that writes a stitching dictioonary that will be used for From ad9eb95e39d4c0e29196d071faa88d3016fff5f1 Mon Sep 17 00:00:00 2001 From: seanfite-alleninstitute Date: Thu, 12 Feb 2026 21:25:19 +0000 Subject: [PATCH 9/9] Setting get_stitching_dict_proteomics func params back to original values. Pruning was consistent across changes. --- code/aind_proteomics_stitch/bigstitcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py index 1eb0a8c..9327ef2 100644 --- a/code/aind_proteomics_stitch/bigstitcher.py +++ b/code/aind_proteomics_stitch/bigstitcher.py @@ -103,9 +103,9 @@ def get_stitching_dict_proteomics( specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2, - relative_optimization_threshold: float = 2, - absolute_optimization_threshold: float = 2, - max_error: int = 2 + relative_optimization_threshold: float = 2.5, + absolute_optimization_threshold: float = 3.5, + max_error: int = 3 ) -> dict: """ A function that writes a stitching dictioonary that will be used for