From 01ff42ddb31e86d2462f97131ce9301b41329018 Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 21:40:41 +0000
Subject: [PATCH 1/9] add proteomics switch to bigstitcher.py for removing
 max_shift and min_correlation from phase correlation params

---
 code/aind_proteomics_stitch/bigstitcher.py | 51 ++++++++++++++++++-
 code/aind_proteomics_stitch/utils/utils.py | 58 ++++++++++++++++++++++
 2 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 52fcf25..22b68cc 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -99,6 +99,43 @@ def get_data_config(
 
     return derivatives_dict, proteomics_dataset, acquisition_dict
 
+def get_stitching_dict_proteomics(
+    specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2
+) -> dict:
+    """
+    A function that writes a stitching dictioonary that will be used for
+    creating a json file that gives parmaters to bigstitcher sittching run
+
+    Parameters
+    ----------
+    specimen_id: str
+        Specimen ID
+    dataset_xml_path: str
+        Path where the xml is located
+    downsample: Optional[int] = 2
+        Image multiscale used for stitching
+
+    Returns
+    -------
+    dict
+        Dictionary with the stitching parameters
+        used for bigstitcher
+    """
+    # assert pathlib.Path(dataset_xml_path).exists()
+
+    stitching_dict = {
+        "session_id": str(specimen_id),
+        "memgb": 100,
+        "parallel": utils.get_code_ocean_cpu_limit(),
+        "dataset_xml": str(dataset_xml_path),
+        "do_phase_correlation": True,
+        "do_detection": False,
+        "do_registrations": False,
+        "phase_correlation_params": {
+            "downsample": downsample,
+        },
+    }
+    return stitching_dict
 
 def get_stitching_dict(
     specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2
@@ -229,11 +266,21 @@ def main(
     scale_for_transforms = int(scale_for_transforms)
 
     # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}")
-    proteomics_stitching_params = get_stitching_dict(
+    project_name = utils.get_project_name()
+    if project_name == "PLACE": 
+        #use different parameters
+        proteomics_stitching_params = get_stitching_dict_proteomics(
         specimen_id=proteomics_dataset_name,
         dataset_xml_path=output_big_stitcher_xml,
         downsample=scale_for_transforms,
-    )
+        )
+    else: 
+
+        proteomics_stitching_params = get_stitching_dict(
+            specimen_id=proteomics_dataset_name,
+            dataset_xml_path=output_big_stitcher_xml,
+            downsample=scale_for_transforms,
+        )
     end_time = time()
 
     output_big_stitcher_json = f"{results_folder}/{proteomics_dataset_name}_stitch_channel_{channel_wavelength}_params.json"
diff --git a/code/aind_proteomics_stitch/utils/utils.py b/code/aind_proteomics_stitch/utils/utils.py
index 13fc01b..e4ca445 100644
--- a/code/aind_proteomics_stitch/utils/utils.py
+++ b/code/aind_proteomics_stitch/utils/utils.py
@@ -25,6 +25,64 @@
 # IO types
 PathLike = Union[str, Path]
 
+def get_project_name() ->str: 
+    """
+    Load the data_description.json file from the data directory and extract project_name .
+    
+    Searches for data_description.json in multiple possible locations using glob patterns:
+    1. ../data/output_aind_metadata/
+    2. ../data/
+    3. ../data/{any_subdirectory}/
+    
+    Returns
+    -------
+    str
+        The project name extracted from the 'project_name' field in data_description.json
+        
+    Raises
+    ------
+    FileNotFoundError
+        If no data_description.json file is found in any of the search locations
+    RuntimeError
+        If error occurs while loading or parsing the JSON configuration
+    """
+    base_data_dir = pathlib.Path("/data")
+    
+    # Use glob to search for data_description.json in all possible locations
+    search_patterns = [
+        base_data_dir / "output_aind_metadata" / "data_description.json",
+        base_data_dir / "data_description.json",
+        # glob.glob(f"{base_data_dir.as_posix()}/data_description.json")[0],  # Any subdirectory
+        glob.glob(f"{base_data_dir.as_posix()}/*/data_description.json")[0], 
+        
+    ]
+    
+    # Find the first existing file
+    json_file_path = None
+    for json_path in search_patterns:
+        if pathlib.Path(json_path).exists():
+            json_file_path = json_path
+            logger.info(f"Found data_description.json at: {json_file_path}")
+            break
+    
+    if json_file_path is None:
+        raise FileNotFoundError(
+            f"No data_description.json file found in {base_data_dir} or any of its subdirectories"
+        )
+    
+    logger.info(f"Loading configuration from {json_file_path}")
+    
+    try:
+        with open(json_file_path, 'r') as f:
+            config = json.load(f)
+            project_name = config.get('project_name')
+            if not project_name:
+                raise ValueError("'project_name' field not found in data_description.json")
+            logger.info(f"Loaded project_name : {project_name}")
+            return project_name
+    except Exception as e:
+        raise RuntimeError(f"Error loading data_description.json: {str(e)}")
+
 
 def get_code_ocean_cpu_limit():
     """

From effdc3b75e78734fd96604c679b17803246b254f Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 21:56:07 +0000
Subject: [PATCH 2/9] add max_error, relative and absolute thresholds

---
 code/aind_proteomics_stitch/bigstitcher.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 22b68cc..3becb7c 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -100,7 +100,12 @@ def get_data_config(
     return derivatives_dict, proteomics_dataset, acquisition_dict
 
 def get_stitching_dict_proteomics(
-    specimen_id: str, dataset_xml_path: str, downsample: Optional[int] = 2
+    specimen_id: str, 
+    dataset_xml_path: str,
+    downsample: Optional[int] = 2, 
+    relative_optimization_threshold: float = 2.5, 
+    absolute_optimization_threshold: float = 3.5, 
+    max_error: int = 3
 ) -> dict:
     """
     A function that writes a stitching dictioonary that will be used for
@@ -133,6 +138,9 @@ def get_stitching_dict_proteomics(
         "do_registrations": False,
         "phase_correlation_params": {
             "downsample": downsample,
+            "relative_optimization_threshold": relative_optimization_threshold, 
+            "absolute_optimization_threshold": absolute_optimization_threshold, 
+            "max_error": max_error
         },
     }
     return stitching_dict

From 6f63cbcb9dcbfcc11d18d24ff6d8b819f4dec643 Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 22:06:30 +0000
Subject: [PATCH 3/9] add switch for proteomics phase correlation json

---
 code/aind_proteomics_stitch/bigstitcher.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 3becb7c..18e2910 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -134,6 +134,7 @@ def get_stitching_dict_proteomics(
         "parallel": utils.get_code_ocean_cpu_limit(),
         "dataset_xml": str(dataset_xml_path),
         "do_phase_correlation": True,
+        "proteomics_dataset": True,
         "do_detection": False,
         "do_registrations": False,
         "phase_correlation_params": {
@@ -276,6 +277,7 @@ def main(
     # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}")
     project_name = utils.get_project_name()
     if project_name == "PLACE": 
+        print(f'Project name {project_name}: generating proteomics stitching dict...')
         #use different parameters
         proteomics_stitching_params = get_stitching_dict_proteomics(
         specimen_id=proteomics_dataset_name,
@@ -283,6 +285,7 @@ def main(
         downsample=scale_for_transforms,
         )
     else: 
+        print(f'Project name {project_name}: generating HCR stitching dict...')
 
         proteomics_stitching_params = get_stitching_dict(
             specimen_id=proteomics_dataset_name,

From 442a4dbe0e0e52288a8f481da9951e48a213bd85 Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 22:50:56 +0000
Subject: [PATCH 4/9] remove print statements for production

---
 code/aind_proteomics_stitch/bigstitcher.py |  4 ++--
 code/aind_proteomics_stitch/utils/utils.py | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 18e2910..9327ef2 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -277,7 +277,7 @@ def main(
     # print(f"Voxel resolution: {voxel_resolution} - Estimating transforms in res: {res_for_transforms} - Scale: {scale_for_transforms}")
     project_name = utils.get_project_name()
     if project_name == "PLACE": 
-        print(f'Project name {project_name}: generating proteomics stitching dict...')
+        # print(f'Project name {project_name}: generating proteomics stitching dict...')
         #use different parameters
         proteomics_stitching_params = get_stitching_dict_proteomics(
         specimen_id=proteomics_dataset_name,
@@ -285,7 +285,7 @@ def main(
         downsample=scale_for_transforms,
         )
     else: 
-        print(f'Project name {project_name}: generating HCR stitching dict...')
+        # print(f'Project name {project_name}: generating HCR stitching dict...')
 
         proteomics_stitching_params = get_stitching_dict(
             specimen_id=proteomics_dataset_name,
diff --git a/code/aind_proteomics_stitch/utils/utils.py b/code/aind_proteomics_stitch/utils/utils.py
index e4ca445..60f31f6 100644
--- a/code/aind_proteomics_stitch/utils/utils.py
+++ b/code/aind_proteomics_stitch/utils/utils.py
@@ -13,6 +13,7 @@
 import time
 from datetime import datetime
 from pathlib import Path
+import glob
 from typing import Any, List, Optional, Tuple, Union
 
 import matplotlib.pyplot as plt
@@ -46,7 +47,7 @@ def get_project_name() ->str:
     RuntimeError
         If error occurs while loading or parsing the JSON configuration
     """
-    base_data_dir = pathlib.Path("/data")
+    base_data_dir = Path("/data")
     
     # Use glob to search for data_description.json in all possible locations
     search_patterns = [
@@ -60,9 +61,9 @@ def get_project_name() ->str:
     # Find the first existing file
     json_file_path = None
     for json_path in search_patterns:
-        if pathlib.Path(json_path).exists():
+        if Path(json_path).exists():
             json_file_path = json_path
-            logger.info(f"Found data_description.json at: {json_file_path}")
+            # print(f"Found data_description.json at: {json_file_path}")
             break
     
     if json_file_path is None:
@@ -70,7 +71,7 @@ def get_project_name() ->str:
             f"No data_description.json file found in {base_data_dir} or any of its subdirectories"
         )
     
-    logger.info(f"Loading configuration from {json_file_path}")
+    # print(f"Loading configuration from {json_file_path}")
     
     try:
         with open(json_file_path, 'r') as f:
@@ -78,7 +79,7 @@ def get_project_name() ->str:
             project_name = config.get('project_name')
             if not project_name:
                 raise ValueError("'project_name' field not found in data_description.json")
-            logger.info(f"Loaded project_name : {project_name}")
+            # print(f"Loaded project_name : {project_name}")
             return project_name
     except Exception as e:
         raise RuntimeError(f"Error loading data_description.json: {str(e)}")

From da9294e09c1d66dd04365e7c9f7c2dcb00920b1c Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 23:07:56 +0000
Subject: [PATCH 5/9] changed datasets and tested manually

---
 .codeocean/datasets.json | 24 ++----------------------
 code/run_manually.py     | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/.codeocean/datasets.json b/.codeocean/datasets.json
index f6bacca..258900b 100644
--- a/.codeocean/datasets.json
+++ b/.codeocean/datasets.json
@@ -2,28 +2,8 @@
 	"version": 1,
 	"attached_datasets": [
 		{
-			"id": "02f566df-130c-489c-bcdf-a21242e333fc",
-			"mount": "HCR_807074_2025-07-30_13-45-00_processed_2025-10-02_06-30-15"
-		},
-		{
-			"id": "15f2c27b-3f03-4765-b2c7-0a98a0098fea",
-			"mount": "HCR_000000-s49_2025-08-13_13-00-00_processed_2025-09-10_22-57-56"
-		},
-		{
-			"id": "20240ace-9ff4-4f2f-8657-5be3757fb696",
-			"mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44_1"
-		},
-		{
-			"id": "29d33eaa-b409-4e07-8c7e-5665ca27b8cb",
-			"mount": "HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01"
-		},
-		{
-			"id": "3f6bc1aa-89db-439d-b4f9-087ca4833705",
-			"mount": "HCR_000000-s43_2025-07-24_13-00-00_processed_2025-09-03_17-16-44"
-		},
-		{
-			"id": "52fe3938-46b5-4ddc-a2df-3c9ca3256a13",
-			"mount": "HCR_807074_2025-08-27_11-45-00_processed_2025-10-02_06-30-17"
+			"id": "0964fa71-ddd4-4ce3-9d9c-7446b2a8f004",
+			"mount": "HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29"
 		}
 	]
 }
\ No newline at end of file
diff --git a/code/run_manually.py b/code/run_manually.py
index d3c6af1..f52348c 100644
--- a/code/run_manually.py
+++ b/code/run_manually.py
@@ -43,11 +43,33 @@ def run_offpipeline():
     # # Printing to get output on batch script
     # print(output_big_stitcher_json)
 
-def run_bigstitcher(): 
-        processed_asset_name = f'HCR_807074_2025-08-26_15-45-00_processed_2025-10-02_22-49-01'
+
+def run_HCR_bigstitcher(): 
+        processed_asset_name = f'HCR_000000-s43_2025-07-24_13-00-00_processed_2026-01-20_18-33-48'
         stitching_channel=405
         path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction"
-        voxel_resolution = (1, 0.3880046677791278, 0.3880046677791278)
+        voxel_resolution = (1, 0.2466335423895654, 0.2466335423895654)
+        results_folder = Path('/results')
+        acquisition_path = f"/data/{processed_asset_name}/acquisition.json"
+
+        bigstitcher.main(
+        path_to_data=path_to_data,
+        channel_wavelength=stitching_channel,
+        acquisition_path=acquisition_path,
+        voxel_resolution=voxel_resolution,
+        results_folder=results_folder,
+        proteomics_dataset_name=processed_asset_name,
+        # res_for_transforms=(0.76, 0.76, 3.4),
+        scale_for_transforms=4,
+        # If this is provided, res for
+        # transforms is ignored
+    )
+
+def run_bigstitcher(): 
+        processed_asset_name = f'HCR_823476-s5-ls2_2025-12-24_00-00-00_processed_2026-01-03_01-30-29'
+        stitching_channel=488
+        path_to_data = f"s3://aind-open-data/{processed_asset_name}/image_radial_correction"
+        voxel_resolution = (0.459, 0.0920179382864407, 0.0920179382864407)
         results_folder = Path('/results')
         acquisition_path = f"/data/{processed_asset_name}/acquisition.json"
 
@@ -58,8 +80,8 @@ def run_bigstitcher():
         voxel_resolution=voxel_resolution,
         results_folder=results_folder,
         proteomics_dataset_name=processed_asset_name,
-        res_for_transforms=(0.76, 0.76, 3.4),
-        scale_for_transforms=2,
+        # res_for_transforms=(0.76, 0.76, 3.4),
+        scale_for_transforms=4,
         # If this is provided, res for
         # transforms is ignored
     )
@@ -83,5 +105,6 @@ def combine_all_xmls():
             print(f'Error combining xmls')
 
 if __name__ == "__main__":
-    # run_bigstitcher()
-    combine_all_xmls()
\ No newline at end of file
+    run_bigstitcher()
+    # run_HCR_bigstitcher()
+    # combine_all_xmls()
\ No newline at end of file

From f5a9adf4f1edbb0c5e6e2499693af9728fa81ae9 Mon Sep 17 00:00:00 2001
From: cbrrrry <carson.berry@alleninstitute.org>
Date: Wed, 21 Jan 2026 23:09:47 +0000
Subject: [PATCH 6/9] Edited Dockerfile

---
 environment/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment/Dockerfile b/environment/Dockerfile
index e0c9376..a76bd62 100644
--- a/environment/Dockerfile
+++ b/environment/Dockerfile
@@ -23,7 +23,7 @@ RUN conda install -y -c conda-forge -c bioconda \
     conda clean -ya
 
 # --- Create Conda Env: proteomics_stitch ---
-RUN conda create -y -n proteomics_stitch python=3.9
+RUN conda create -y -n proteomics_stitch python=3.9 
 
 # --- Activate Environment and Install Python Packages ---
 SHELL ["conda", "run", "-n", "proteomics_stitch", "/bin/bash", "-c"]

From fc93dcbab45760fabb534863a8e44b3e76cbe05b Mon Sep 17 00:00:00 2001
From: seanfite-alleninstitute <sean.fite@alleninstitute.org>
Date: Thu, 12 Feb 2026 19:45:26 +0000
Subject: [PATCH 7/9] Updated get_stitching_dict_proteomics func by changing
 absolute opt threshold from 3.5 to 2.5

---
 code/aind_proteomics_stitch/bigstitcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 9327ef2..0302a62 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -104,7 +104,7 @@ def get_stitching_dict_proteomics(
     dataset_xml_path: str,
     downsample: Optional[int] = 2, 
     relative_optimization_threshold: float = 2.5, 
-    absolute_optimization_threshold: float = 3.5, 
+    absolute_optimization_threshold: float = 2.5, 
     max_error: int = 3
 ) -> dict:
     """

From 3aefcfeef98293f7046a59a09e79ca1fd96ddb37 Mon Sep 17 00:00:00 2001
From: seanfite-alleninstitute <sean.fite@alleninstitute.org>
Date: Thu, 12 Feb 2026 20:40:59 +0000
Subject: [PATCH 8/9] Updated get_stitching_dict_proteomics func params to set
 all thresholds and max error to 2

---
 code/aind_proteomics_stitch/bigstitcher.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 0302a62..1eb0a8c 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -103,9 +103,9 @@ def get_stitching_dict_proteomics(
     specimen_id: str, 
     dataset_xml_path: str,
     downsample: Optional[int] = 2, 
-    relative_optimization_threshold: float = 2.5, 
-    absolute_optimization_threshold: float = 2.5, 
-    max_error: int = 3
+    relative_optimization_threshold: float = 2, 
+    absolute_optimization_threshold: float = 2, 
+    max_error: int = 2
 ) -> dict:
     """
     A function that writes a stitching dictioonary that will be used for

From ad9eb95e39d4c0e29196d071faa88d3016fff5f1 Mon Sep 17 00:00:00 2001
From: seanfite-alleninstitute <sean.fite@alleninstitute.org>
Date: Thu, 12 Feb 2026 21:25:19 +0000
Subject: [PATCH 9/9] Setting get_stitching_dict_proteomics func params back to
 original values. Pruning was consistent across changes.

---
 code/aind_proteomics_stitch/bigstitcher.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/code/aind_proteomics_stitch/bigstitcher.py b/code/aind_proteomics_stitch/bigstitcher.py
index 1eb0a8c..9327ef2 100644
--- a/code/aind_proteomics_stitch/bigstitcher.py
+++ b/code/aind_proteomics_stitch/bigstitcher.py
@@ -103,9 +103,9 @@ def get_stitching_dict_proteomics(
     specimen_id: str, 
     dataset_xml_path: str,
     downsample: Optional[int] = 2, 
-    relative_optimization_threshold: float = 2, 
-    absolute_optimization_threshold: float = 2, 
-    max_error: int = 2
+    relative_optimization_threshold: float = 2.5, 
+    absolute_optimization_threshold: float = 3.5, 
+    max_error: int = 3
 ) -> dict:
     """
     A function that writes a stitching dictioonary that will be used for