bhklab · JoshuaSiraj · Nov 25, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/pixi.lock b/pixi.lock
diff --git a/src/imgtools/autopipeline.py b/src/imgtools/autopipeline.py
@@ -262,7 +262,7 @@ def process_one_sample(
             SampleNumber=idx,
         )
         result.output_files = list(saved_files)
-        if not result.output_files:
+        if not result.output_files and not sample_output.dry_run:
             raise ValueError(
                 "No output files were saved. Check the output directory."
             )
@@ -305,6 +305,7 @@ def __init__(
         spacing: tuple[float, float, float] = (0.0, 0.0, 0.0),
         window: float | None = None,
         level: float | None = None,
+        dry_run: bool = False,
     ) -> None:
         """
         Initialize the Autopipeline.
@@ -342,6 +343,8 @@ def __init__(
             Window level for intensity normalization, by default None
         level : float | None, optional
             Window level for intensity normalization, by default None
+        dry_run : bool, optional
+            Whether to run the pipeline in dry run mode, by default False
         """
         self.input = SampleInput.build(
             directory=Path(input_directory),
@@ -359,6 +362,7 @@ def __init__(
             filename_format=output_filename_format,
             existing_file_mode=existing_file_mode,
             extra_context={},
+            dry_run=dry_run,
         )
 
         transforms: list[BaseTransform] = [

diff --git a/src/imgtools/cli/autopipeline.py b/src/imgtools/cli/autopipeline.py
@@ -165,6 +165,13 @@ def parse_spacing(ctx, param, value): # type: ignore
     default=None,
     help="Path to YAML file containing ROI matching patterns."
 )
+@click.option(
+    "--dry-run",
+    "-d",
+    is_flag=True,
+    default=False,
+    help="Run the pipeline in dry run mode, by default False"
+)
 @click.help_option(
     "-h",
     "--help",
@@ -186,6 +193,7 @@ def autopipeline(
     roi_on_missing_regex: str,
     roi_match_map: Tuple[str],
     roi_match_yaml: Path,
+    dry_run: bool,
 ) -> None:
     """Core utility to process messy DICOM data into organized NIfTI files.
 
@@ -263,6 +271,7 @@ def autopipeline(
         spacing=spacing,
         window=window,
         level=level,
+        dry_run=dry_run,
     )
 
     # Run the pipeline

diff --git a/src/imgtools/dicom/dicom_metadata/extractor_base.py b/src/imgtools/dicom/dicom_metadata/extractor_base.py
@@ -108,6 +108,15 @@ class ModalityMetadataExtractor(ABC):
         "SeriesInstanceUID",
         "StudyInstanceUID",
         "Modality",
+        # Sensitive Patient Demographics (PHI)
+        # These tags contain Protected Health Information (PHI) and are sensitive.
+        "PatientSex",
+        "PatientBirthDate",
+        "PatientAge",
+        "EthnicGroup",
+        "PatientWeight",
+        "PatientSize",
+        "AdditionalPatientHistory",
         # Image Geometry & Size
         "BodyPartExamined",
         "DataCollectionDiameter",

diff --git a/src/imgtools/io/sample_output.py b/src/imgtools/io/sample_output.py
@@ -131,7 +131,11 @@ class SampleOutput(BaseModel):
             {"dataset": "NSCLC-Radiomics", "processing_date": "2025-04-22"}
         ],
     )
-
+    dry_run: bool = Field(
+        default=False,
+        description="Whether to run the pipeline in dry run mode, by default False. Will generate the output index listing images and specified masks with ROIs.",
+        title="Dry Run",
+    )
     _writer: AbstractBaseWriter | None = PrivateAttr(default=None)
 
     def model_post_init(self, __context) -> None:  # type: ignore # noqa: ANN001
@@ -141,6 +145,8 @@ def model_post_init(self, __context) -> None:  # type: ignore # noqa: ANN001
             existing_file_mode=self.existing_file_mode,
             filename_format=self.filename_format,
             context=self.extra_context,
+            dry_run=self.dry_run,
+            create_dirs=not self.dry_run,  # only create directories if not in dry run mode
         )
 
     @field_validator("directory")

diff --git a/src/imgtools/io/writers/abstract_base_writer.py b/src/imgtools/io/writers/abstract_base_writer.py
@@ -126,6 +126,10 @@ class AbstractBaseWriter(ABC, Generic[ContentType]):
     absolute_paths_in_index : bool, default=False
         If True, saves absolute paths in the index file.
         If False, saves paths relative to the root directory.
+    dry_run : bool, default=False
+        If True, skip the actual file I/O operations but still write to the index
+        and return the resolved path. Useful for previewing what would be saved
+        or updating the index without writing files.
     pattern_resolver : PatternResolver
         Instance used to handle filename formatting with placeholders.
 
@@ -152,6 +156,7 @@ class AbstractBaseWriter(ABC, Generic[ContentType]):
     pattern_resolver: PatternResolver = field(init=False)
     overwrite_index: bool = field(default=False)
     absolute_paths_in_index: bool = field(default=False)
+    dry_run: bool = field(default=False)
 
     index_filename: Optional[str] = field(default=None)
     _checked_directories: set[str] = field(default_factory=set, init=False)
@@ -212,6 +217,21 @@ def save(self, data: ContentType, **kwargs: Any) -> Path:
         updating them with the kwargs passed from the save method.
 
         This will help simplify repeated saves with similar context variables.
+
+        When `self.dry_run` is True, implementations should skip file I/O operations
+        but still write to the index and return the resolved path.
+
+        Parameters
+        ----------
+        data : ContentType
+            The data to be saved.
+        **kwargs : Any
+            Additional context variables for filename generation and path resolution.
+
+        Returns
+        -------
+        Path
+            The resolved path where the file would be (or was) saved.
         """
         pass
 
@@ -332,6 +352,10 @@ def resolve_path(self, **kwargs: object) -> Path:
             #     msg = f"Directory {out_path.parent} does not exist."
             #     raise DirectoryNotFoundError(msg)
             return out_path
+
+        if self.dry_run:  # This is here so that the existing_file_mode logic is not executed in dry run mode
+            return out_path
+
         match self.existing_file_mode:
             case ExistingFileMode.SKIP:
                 return out_path
@@ -612,7 +636,7 @@ def save(self, data: str, **kwargs: object) -> Path:
 
         Parameters
         ----------
-        content : str
+        data : str
             The content to write to the file.
         **kwargs : Any
             Additional context for filename generation.
@@ -625,9 +649,10 @@ def save(self, data: str, **kwargs: object) -> Path:
         # Resolve the output file path
         output_path = self.resolve_path(**kwargs)
 
-        # Write content to the file
-        with output_path.open(mode="w", encoding="utf-8") as f:
-            f.write(data)
+        # Write content to the file only if not in dry_run mode
+        if not self.dry_run:
+            with output_path.open(mode="w", encoding="utf-8") as f:
+                f.write(data)
 
         self.add_to_index(output_path, replace_existing=output_path.exists())
 

diff --git a/src/imgtools/io/writers/nifti_writer.py b/src/imgtools/io/writers/nifti_writer.py
@@ -195,25 +195,26 @@ def save(self, data: sitk.Image | np.ndarray, **kwargs: object) -> Path:
         else:
             out_path = self.resolve_path(**kwargs)
 
-        if (
-            out_path.exists()  # check if it exists
-            # This will only be true if SKIP,
-            # OVERWRITE would have deleted the file
-            and self.existing_file_mode == ExistingFileMode.SKIP
-        ):
-            logger.debug("File exists, skipping.", out_path=out_path)
-            return out_path
-
-        try:
-            sitk.WriteImage(
-                image,
-                out_path.as_posix(),
-                useCompression=True,
-                compressionLevel=self.compression_level,
-            )
-        except Exception as e:
-            msg = f"Error writing image to file {out_path}: {e}"
-            raise NiftiWriterIOError(msg) from e
+        if not self.dry_run:
+            if (
+                out_path.exists()  # check if it exists
+                # This will only be true if SKIP,
+                # OVERWRITE would have deleted the file
+                and self.existing_file_mode == ExistingFileMode.SKIP
+            ):
+                logger.debug("File exists, skipping.", out_path=out_path)
+                return out_path
+
+            try:
+                sitk.WriteImage(
+                    image,
+                    out_path.as_posix(),
+                    useCompression=True,
+                    compressionLevel=self.compression_level,
+                )
+            except Exception as e:
+                msg = f"Error writing image to file {out_path}: {e}"
+                raise NiftiWriterIOError(msg) from e
 
         self.add_to_index(
             out_path,

diff --git a/src/imgtools/io/writers/numpy_writer.py b/src/imgtools/io/writers/numpy_writer.py
@@ -82,6 +82,8 @@ def save(
         ----------
         data : np.ndarray | sitk.Image | dict[str, np.ndarray | sitk.Image]
             The data to save. Can be a single image or a dictionary of images.
+        **kwargs : object
+            Additional context for filename generation.
 
         Returns
         -------
@@ -95,29 +97,32 @@ def save(
         """
         out_path = self.resolve_path(**kwargs)
 
-        if isinstance(data, (np.ndarray, sitk.Image)):
-            # Single image or array
-            array, metadata = self._to_numpy(data)
-            np.savez_compressed(out_path, image_array=array, **metadata)
-        elif isinstance(data, dict):
-            # Multiple images or arrays
-            arrays = {}
-            metadata = {}
-            for key, value in data.items():
-                array, meta = self._to_numpy(value)
-                arrays[key] = array
-                for meta_key, meta_value in meta.items():
-                    metadata[f"{key}_{meta_key}"] = meta_value
-            if self.compressed:
-                np.savez_compressed(
-                    out_path, allow_pickle=False, **arrays, **metadata
-                )
+        if not self.dry_run:
+            if isinstance(data, (np.ndarray, sitk.Image)):
+                # Single image or array
+                array, metadata = self._to_numpy(data)
+                np.savez_compressed(out_path, image_array=array, **metadata)
+            elif isinstance(data, dict):
+                # Multiple images or arrays
+                arrays = {}
+                metadata = {}
+                for key, value in data.items():
+                    array, meta = self._to_numpy(value)
+                    arrays[key] = array
+                    for meta_key, meta_value in meta.items():
+                        metadata[f"{key}_{meta_key}"] = meta_value
+                if self.compressed:
+                    np.savez_compressed(
+                        out_path, allow_pickle=False, **arrays, **metadata
+                    )
+                else:
+                    np.savez(
+                        out_path, allow_pickle=False, **arrays, **metadata
+                    )
             else:
-                np.savez(out_path, allow_pickle=False, **arrays, **metadata)
-        else:
-            raise NumpyWriterValidationError(
-                "Data must be a NumPy array, SimpleITK image, or a dictionary of these types."
-            )
+                raise NumpyWriterValidationError(
+                    "Data must be a NumPy array, SimpleITK image, or a dictionary of these types."
+                )
 
         self.add_to_index(
             out_path,