From dcf0faa2cf36f8cd584a5a992b1efceab5e200d3 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Thu, 19 Mar 2026 20:42:25 -0500 Subject: [PATCH 01/10] Add automatic execution planning for Dask workflows --- src/clearex/gui/app.py | 661 +++++++++++++++++- src/clearex/io/cli.py | 26 + src/clearex/io/provenance.py | 34 + src/clearex/main.py | 356 +++++++--- src/clearex/workflow.py | 1235 +++++++++++++++++++++++++++++++++- tests/test_main.py | 6 +- 6 files changed, 2194 insertions(+), 124 deletions(-) diff --git a/src/clearex/gui/app.py b/src/clearex/gui/app.py index 3cbe0c7..68c9abf 100644 --- a/src/clearex/gui/app.py +++ b/src/clearex/gui/app.py @@ -86,6 +86,7 @@ DEFAULT_ZARR_PYRAMID_PTCZYX, DEFAULT_SLURM_CLUSTER_JOB_EXTRA_DIRECTIVES, DaskBackendConfig, + ExecutionPolicy, LocalClusterRecommendation, LocalClusterConfig, PTCZYX_AXES, @@ -96,17 +97,23 @@ ZarrSaveConfig, analysis_chainable_output_component, analysis_operation_for_output_component, + calibration_profile_from_dict, + calibration_profile_to_dict, collect_analysis_input_references, dask_backend_from_dict, dask_backend_to_dict, default_analysis_operation_parameters, - format_dask_backend_summary, + execution_policy_from_dict, + execution_policy_to_dict, + format_execution_plan_summary, + format_execution_policy_summary, format_local_cluster_recommendation_summary, format_pyramid_levels, format_zarr_chunks_ptczyx, format_zarr_pyramid_ptczyx, normalize_analysis_operation_parameters, parse_pyramid_levels, + plan_execution, recommend_local_cluster_config, resolve_analysis_input_component, validate_analysis_input_references, @@ -190,6 +197,10 @@ class GuiUnavailableError(RuntimeError): _GUI_APP_ICON = "icon.png" _CLEAREX_SETTINGS_DIR_NAME = ".clearex" _CLEAREX_DASK_BACKEND_SETTINGS_FILE = "dask_backend_settings.json" +_CLEAREX_EXECUTION_POLICY_SETTINGS_FILE = "execution_policy_settings.json" +_CLEAREX_EXECUTION_CALIBRATION_PROFILES_FILE = ( + "execution_calibration_profiles.json" +) _CLEAREX_ZARR_SAVE_SETTINGS_FILE = "zarr_save_settings.json" _CLEAREX_EXPERIMENT_LIST_FORMAT = "clearex-experiment-list/v1" _CLEAREX_EXPERIMENT_LIST_FILE_SUFFIX = ".clearex-experiment-list.json" @@ -840,6 +851,30 @@ def _resolve_dask_backend_settings_path( return directory / _CLEAREX_DASK_BACKEND_SETTINGS_FILE +def _resolve_execution_policy_settings_path( + settings_directory: Optional[Path] = None, +) -> Path: + """Resolve the user settings JSON path for persisted execution policy.""" + directory = ( + settings_directory + if settings_directory is not None + else _resolve_clearex_settings_directory() + ) + return directory / _CLEAREX_EXECUTION_POLICY_SETTINGS_FILE + + +def _resolve_execution_calibration_profiles_path( + settings_directory: Optional[Path] = None, +) -> Path: + """Resolve the user settings JSON path for persisted calibration profiles.""" + directory = ( + settings_directory + if settings_directory is not None + else _resolve_clearex_settings_directory() + ) + return directory / _CLEAREX_EXECUTION_CALIBRATION_PROFILES_FILE + + def _resolve_zarr_save_settings_path( settings_directory: Optional[Path] = None, ) -> Path: @@ -959,6 +994,80 @@ def _load_last_used_dask_backend_config( return dask_backend_from_dict(payload) +def _load_last_used_execution_policy( + settings_path: Optional[Path] = None, +) -> Optional[ExecutionPolicy]: + """Load the last-used execution policy from JSON.""" + path = ( + settings_path + if settings_path is not None + else _resolve_execution_policy_settings_path() + ) + resolved = path.expanduser() + if not resolved.exists(): + return None + + try: + raw_text = resolved.read_text(encoding="utf-8") + except Exception as exc: + logging.getLogger(__name__).warning( + "Failed to read execution policy settings %s: %s", + resolved, + exc, + ) + return None + + if not raw_text.strip(): + return None + + try: + payload = json.loads(raw_text) + except json.JSONDecodeError as exc: + logging.getLogger(__name__).warning( + "Failed to decode execution policy settings %s: %s", + resolved, + exc, + ) + return None + + if isinstance(payload, dict) and not payload: + return None + return execution_policy_from_dict(payload) + + +def _load_execution_calibration_profiles( + settings_path: Optional[Path] = None, +) -> Dict[str, Any]: + """Load persisted execution calibration profiles from JSON.""" + path = ( + settings_path + if settings_path is not None + else _resolve_execution_calibration_profiles_path() + ) + resolved = path.expanduser() + if not resolved.exists(): + return {} + try: + raw_text = resolved.read_text(encoding="utf-8") + except Exception: + return {} + if not raw_text.strip(): + return {} + try: + payload = json.loads(raw_text) + except json.JSONDecodeError: + return {} + if not isinstance(payload, dict): + return {} + profiles: Dict[str, Any] = {} + for key, value in payload.items(): + profile = calibration_profile_from_dict(value) + if profile is None: + continue + profiles[str(key)] = profile + return profiles + + def _load_last_used_zarr_save_config( settings_path: Optional[Path] = None, ) -> Optional[ZarrSaveConfig]: @@ -1071,6 +1180,63 @@ def _save_last_used_dask_backend_config( return True +def _save_last_used_execution_policy( + config: ExecutionPolicy, + settings_path: Optional[Path] = None, +) -> bool: + """Persist the most recently used execution policy.""" + path = ( + settings_path + if settings_path is not None + else _resolve_execution_policy_settings_path() + ) + resolved = path.expanduser() + _ensure_clearex_settings_directory(resolved.parent) + + try: + payload = execution_policy_to_dict(config) + serialized = json.dumps(payload, indent=2, sort_keys=True) + resolved.write_text(f"{serialized}\n", encoding="utf-8") + except Exception as exc: + logging.getLogger(__name__).warning( + "Failed to save execution policy settings %s: %s", + resolved, + exc, + ) + return False + return True + + +def _save_execution_calibration_profiles( + profiles: Mapping[str, Any], + settings_path: Optional[Path] = None, +) -> bool: + """Persist execution calibration profiles.""" + path = ( + settings_path + if settings_path is not None + else _resolve_execution_calibration_profiles_path() + ) + resolved = path.expanduser() + _ensure_clearex_settings_directory(resolved.parent) + + try: + payload = { + str(key): calibration_profile_to_dict(profile) + for key, profile in profiles.items() + } + serialized = json.dumps(payload, indent=2, sort_keys=True) + resolved.write_text(f"{serialized}\n", encoding="utf-8") + except Exception as exc: + logging.getLogger(__name__).warning( + "Failed to save execution calibration profiles %s: %s", + resolved, + exc, + ) + return False + return True + + def _save_last_used_zarr_save_config( config: ZarrSaveConfig, settings_path: Optional[Path] = None, @@ -1135,6 +1301,15 @@ def _should_apply_persisted_dask_backend(initial: Optional[WorkflowConfig]) -> b return initial.dask_backend == DaskBackendConfig() +def _should_apply_persisted_execution_policy( + initial: Optional[WorkflowConfig], +) -> bool: + """Return whether persisted execution policy should override GUI defaults.""" + if initial is None: + return True + return initial.execution_policy == ExecutionPolicy() + + def _should_apply_persisted_zarr_save(initial: Optional[WorkflowConfig]) -> bool: """Return whether persisted Zarr save settings should override defaults. @@ -3649,6 +3824,262 @@ def _on_apply(self) -> None: self.accept() + class ExecutionPolicyDialog(QDialog): + """Dialog for configuring automatic execution planning.""" + + def __init__( + self, + *, + initial_policy: ExecutionPolicy, + initial_backend: DaskBackendConfig, + workload: str, + summary_workflow_factory: Callable[ + [ExecutionPolicy, DaskBackendConfig], WorkflowConfig + ], + recommendation_shape_tpczyx: Optional[ + Tuple[int, int, int, int, int, int] + ] = None, + recommendation_chunks_tpczyx: Optional[ + Tuple[int, int, int, int, int, int] + ] = None, + recommendation_dtype_itemsize: Optional[int] = None, + parent: Optional[QDialog] = None, + ) -> None: + """Initialize execution-policy dialog state.""" + super().__init__(parent) + self.setWindowTitle("Execution Planning") + self.result_policy: Optional[ExecutionPolicy] = None + self.result_backend: Optional[DaskBackendConfig] = None + self._advanced_backend = initial_backend + self._workload = str(workload).strip().lower() or "analysis" + self._summary_workflow_factory = summary_workflow_factory + self._recommendation_shape_tpczyx = recommendation_shape_tpczyx + self._recommendation_chunks_tpczyx = recommendation_chunks_tpczyx + self._recommendation_dtype_itemsize = recommendation_dtype_itemsize + self._refresh_calibration_once = False + self._build_ui() + self._hydrate(initial_policy) + self.setStyleSheet(_popup_dialog_stylesheet()) + _apply_initial_dialog_geometry( + self, + minimum_size=_DASK_BACKEND_DIALOG_MINIMUM_SIZE, + preferred_size=_DASK_BACKEND_DIALOG_PREFERRED_SIZE, + content_size_hint=(self.sizeHint().width(), self.sizeHint().height()), + ) + + def _build_ui(self) -> None: + """Construct dialog controls and wire signals.""" + root = QVBoxLayout(self) + apply_popup_root_spacing(root) + + overview = QLabel( + "Choose whether ClearEx plans worker resources automatically " + "or uses the advanced backend override." + ) + overview.setWordWrap(True) + root.addWidget(overview) + + form = QFormLayout() + apply_form_spacing(form) + self._mode_combo = QComboBox() + self._mode_combo.addItem("Auto", "auto") + self._mode_combo.addItem("Advanced", "advanced") + form.addRow("Mode", self._mode_combo) + + self._max_workers_input = QLineEdit() + self._max_workers_input.setPlaceholderText("blank = auto") + form.addRow("Max workers", self._max_workers_input) + + self._memory_per_worker_input = QLineEdit() + self._memory_per_worker_input.setPlaceholderText("auto") + form.addRow("Memory per worker", self._memory_per_worker_input) + root.addLayout(form) + + button_row = QHBoxLayout() + apply_row_spacing(button_row) + self._calibrate_button = _configure_fixed_height_button( + QPushButton("Calibrate") + ) + self._advanced_button = _configure_fixed_height_button( + QPushButton("Advanced Backend") + ) + button_row.addWidget(self._calibrate_button) + button_row.addWidget(self._advanced_button) + button_row.addStretch(1) + root.addLayout(button_row) + + self._summary_label = QLabel("") + self._summary_label.setWordWrap(True) + self._summary_label.setObjectName("metadataFieldValue") + root.addWidget(self._summary_label) + + footer = QHBoxLayout() + apply_footer_row_spacing(footer) + self._defaults_button = _configure_fixed_height_button( + QPushButton("Reset Defaults") + ) + self._cancel_button = _configure_fixed_height_button( + QPushButton("Cancel") + ) + self._apply_button = _configure_fixed_height_button( + QPushButton("Apply") + ) + self._apply_button.setObjectName("runButton") + footer.addWidget(self._defaults_button) + footer.addStretch(1) + footer.addWidget(self._cancel_button) + footer.addWidget(self._apply_button) + root.addLayout(footer) + + self._mode_combo.currentIndexChanged.connect(self._on_mode_changed) + self._max_workers_input.textChanged.connect(self._refresh_summary) + self._memory_per_worker_input.textChanged.connect(self._refresh_summary) + self._calibrate_button.clicked.connect(self._on_calibrate) + self._advanced_button.clicked.connect(self._on_edit_advanced_backend) + self._defaults_button.clicked.connect(self._on_reset_defaults) + self._cancel_button.clicked.connect(self.reject) + self._apply_button.clicked.connect(self._on_apply) + + def _parse_optional_positive_int( + self, + text: str, + *, + field_name: str, + ) -> Optional[int]: + """Parse optional positive integers from line-edit text.""" + stripped = text.strip() + if not stripped: + return None + try: + value = int(stripped) + except ValueError as exc: + raise ValueError(f"{field_name} must be an integer.") from exc + if value <= 0: + raise ValueError(f"{field_name} must be greater than zero.") + return value + + def _current_policy( + self, + *, + force_refresh: bool, + ) -> ExecutionPolicy: + """Build an execution policy from current widget state.""" + return ExecutionPolicy( + mode=str(self._mode_combo.currentData()), + max_workers=self._parse_optional_positive_int( + self._max_workers_input.text(), + field_name="Max workers", + ), + memory_per_worker_limit=( + self._memory_per_worker_input.text().strip() or "auto" + ), + calibration_policy=( + "refresh" + if force_refresh + else "use_if_available" + ), + ) + + def _refresh_summary(self) -> None: + """Refresh the execution-plan summary for current controls.""" + try: + policy = self._current_policy( + force_refresh=self._refresh_calibration_once + ) + workflow = self._summary_workflow_factory( + policy, + self._advanced_backend, + ) + profiles = ( + {} + if self._refresh_calibration_once + else _load_execution_calibration_profiles() + ) + plan = plan_execution( + workflow, + workload=self._workload, + shape_tpczyx=self._recommendation_shape_tpczyx, + chunks_tpczyx=self._recommendation_chunks_tpczyx, + dtype_itemsize=self._recommendation_dtype_itemsize, + calibration_profiles=profiles, + ) + except Exception as exc: + self._summary_label.setText( + f"Could not derive execution plan: {type(exc).__name__}: {exc}" + ) + return + + text = ( + f"Policy: {format_execution_policy_summary(policy)}\n" + f"Plan: {format_execution_plan_summary(plan)}" + ) + self._summary_label.setText(text) + self._summary_label.setToolTip(text) + + def _hydrate(self, initial_policy: ExecutionPolicy) -> None: + """Populate controls from an initial execution policy.""" + index = self._mode_combo.findData(initial_policy.mode) + if index < 0: + index = 0 + self._mode_combo.setCurrentIndex(index) + self._max_workers_input.setText( + "" + if initial_policy.max_workers is None + else str(initial_policy.max_workers) + ) + self._memory_per_worker_input.setText( + str(initial_policy.memory_per_worker_limit) + ) + self._refresh_calibration_once = False + self._on_mode_changed(index) + self._refresh_summary() + + def _on_mode_changed(self, _: int) -> None: + """Update enabled state after policy mode changes.""" + auto_mode = str(self._mode_combo.currentData()) == "auto" + self._max_workers_input.setEnabled(auto_mode) + self._memory_per_worker_input.setEnabled(auto_mode) + self._calibrate_button.setEnabled(auto_mode) + self._refresh_summary() + + def _on_calibrate(self) -> None: + """Mark the next execution to refresh the cached profile.""" + self._refresh_calibration_once = True + self._refresh_summary() + + def _on_edit_advanced_backend(self) -> None: + """Open the advanced backend dialog and store its result.""" + dialog = DaskBackendConfigDialog( + initial=self._advanced_backend, + recommendation_shape_tpczyx=self._recommendation_shape_tpczyx, + recommendation_chunks_tpczyx=self._recommendation_chunks_tpczyx, + recommendation_dtype_itemsize=self._recommendation_dtype_itemsize, + parent=self, + ) + if dialog.exec() != QDialog.DialogCode.Accepted: + return + if dialog.result_config is None: + return + self._advanced_backend = dialog.result_config + self._refresh_summary() + + def _on_reset_defaults(self) -> None: + """Reset controls to default execution policy values.""" + self._advanced_backend = DaskBackendConfig() + self._hydrate(ExecutionPolicy()) + + def _on_apply(self) -> None: + """Validate current state and accept the dialog.""" + try: + self.result_policy = self._current_policy( + force_refresh=self._refresh_calibration_once + ) + except ValueError as exc: + QMessageBox.warning(self, "Invalid Execution Planning", str(exc)) + return + self.result_backend = self._advanced_backend + self.accept() + class DataStoreMaterializationWorker(QThread): """Background worker that materializes canonical store data. @@ -3682,6 +4113,7 @@ def __init__( *, experiment: NavigateExperiment, source_data_path: Path, + execution_policy: ExecutionPolicy, dask_backend: DaskBackendConfig, zarr_save: ZarrSaveConfig, ) -> None: @@ -3706,6 +4138,7 @@ def __init__( super().__init__() self._experiment = experiment self._source_data_path = source_data_path + self._execution_policy = execution_policy self._dask_backend = dask_backend self._zarr_save = zarr_save @@ -3745,8 +4178,25 @@ def run(self) -> None: """ try: with ExitStack() as exit_stack: + profiles = _load_execution_calibration_profiles() + planning_workflow = WorkflowConfig( + execution_policy=self._execution_policy, + dask_backend=self._dask_backend, + zarr_save=self._zarr_save, + ) + plan = plan_execution( + planning_workflow, + workload="io", + chunks_tpczyx=self._zarr_save.chunks_tpczyx(), + calibration_profiles=profiles, + ) + if plan.calibration_profile is not None: + profiles[plan.calibration_profile.profile_key] = ( + plan.calibration_profile + ) + _save_execution_calibration_profiles(profiles) client = _configure_dask_backend_client( - self._dask_backend, + plan.backend_config, exit_stack=exit_stack, ) result = materialize_experiment_data_store( @@ -3799,6 +4249,7 @@ def __init__( self, *, requests: Sequence[ExperimentStorePreparationRequest], + execution_policy: ExecutionPolicy, dask_backend: DaskBackendConfig, zarr_save: ZarrSaveConfig, force_rebuild: bool = False, @@ -3825,6 +4276,7 @@ def __init__( """ super().__init__() self._requests = list(requests) + self._execution_policy = execution_policy self._dask_backend = dask_backend self._zarr_save = zarr_save self._force_rebuild = bool(force_rebuild) @@ -3921,8 +4373,25 @@ def run(self) -> None: try: with ExitStack() as exit_stack: + profiles = _load_execution_calibration_profiles() + planning_workflow = WorkflowConfig( + execution_policy=self._execution_policy, + dask_backend=self._dask_backend, + zarr_save=self._zarr_save, + ) + plan = plan_execution( + planning_workflow, + workload="io", + chunks_tpczyx=self._zarr_save.chunks_tpczyx(), + calibration_profiles=profiles, + ) + if plan.calibration_profile is not None: + profiles[plan.calibration_profile.profile_key] = ( + plan.calibration_profile + ) + _save_execution_calibration_profiles(profiles) client = _configure_dask_backend_client( - self._dask_backend, + plan.backend_config, exit_stack=exit_stack, ) for index, request in enumerate(self._requests): @@ -4391,6 +4860,7 @@ def __init__(self, initial: WorkflowConfig) -> None: self._opener = ImageOpener() self.result_config: Optional[WorkflowConfig] = None self._metadata_labels: Dict[str, QLabel] = {} + self._execution_policy: ExecutionPolicy = initial.execution_policy self._dask_backend_config: DaskBackendConfig = initial.dask_backend self._zarr_save_config: ZarrSaveConfig = initial.zarr_save self._chunks = initial.chunks @@ -4590,7 +5060,7 @@ def _build_ui(self) -> None: zarr_layout.addLayout(zarr_button_row) root.addWidget(zarr_group) - dask_backend_group = QGroupBox("Dask Backend") + dask_backend_group = QGroupBox("Execution Planning") dask_backend_layout = QVBoxLayout(dask_backend_group) apply_stack_spacing(dask_backend_layout) dask_backend_layout.setContentsMargins(10, 8, 10, 10) @@ -4604,7 +5074,7 @@ def _build_ui(self) -> None: dask_backend_button_row = QHBoxLayout() apply_row_spacing(dask_backend_button_row) dask_backend_button_row.addStretch(1) - self._dask_backend_button = QPushButton("Edit Dask Backend") + self._dask_backend_button = QPushButton("Edit Execution Planning") dask_backend_button_row.addWidget(self._dask_backend_button) dask_backend_layout.addLayout(dask_backend_button_row) root.addWidget(dask_backend_group) @@ -4714,7 +5184,28 @@ def _refresh_dask_backend_summary(self) -> None: None Summary labels are updated in-place. """ - summary = format_dask_backend_summary(self._dask_backend_config) + try: + workflow = WorkflowConfig( + execution_policy=self._execution_policy, + dask_backend=self._dask_backend_config, + zarr_save=self._zarr_save_config, + chunks=self._chunks, + ) + profiles = _load_execution_calibration_profiles() + plan = plan_execution( + workflow, + workload="io", + shape_tpczyx=self._current_local_cluster_shape_tpczyx(), + chunks_tpczyx=self._zarr_save_config.chunks_tpczyx(), + dtype_itemsize=self._current_dtype_itemsize(), + calibration_profiles=profiles, + ) + summary = ( + f"Policy: {format_execution_policy_summary(self._execution_policy)}\n" + f"Plan: {format_execution_plan_summary(plan)}" + ) + except Exception as exc: + summary = f"Could not derive execution plan: {type(exc).__name__}: {exc}" self._dask_backend_summary.setText(summary) self._dask_backend_summary.setToolTip(summary) @@ -4746,19 +5237,32 @@ def _on_edit_dask_backend(self) -> None: None Selected backend values are stored in-place. """ - dialog = DaskBackendConfigDialog( - initial=self._dask_backend_config, + dialog = ExecutionPolicyDialog( + initial_policy=self._execution_policy, + initial_backend=self._dask_backend_config, + workload="io", + summary_workflow_factory=lambda policy, backend: WorkflowConfig( + execution_policy=policy, + dask_backend=backend, + zarr_save=self._zarr_save_config, + chunks=self._chunks, + ), recommendation_shape_tpczyx=self._current_local_cluster_shape_tpczyx(), recommendation_chunks_tpczyx=self._zarr_save_config.chunks_tpczyx(), recommendation_dtype_itemsize=self._current_dtype_itemsize(), parent=self, ) result = dialog.exec() - if result != QDialog.DialogCode.Accepted or dialog.result_config is None: + if ( + result != QDialog.DialogCode.Accepted + or dialog.result_policy is None + or dialog.result_backend is None + ): return - self._dask_backend_config = dialog.result_config + self._execution_policy = dialog.result_policy + self._dask_backend_config = dialog.result_backend self._refresh_dask_backend_summary() - self._set_status("Updated Dask backend settings.") + self._set_status("Updated execution planning settings.") def _on_edit_zarr_settings(self) -> None: """Open Zarr settings dialog and apply selected configuration. @@ -6152,6 +6656,7 @@ def _accept_with_store_path( ), analysis_apply_to_all=False, prefer_dask=True, + execution_policy=self._execution_policy, dask_backend=self._dask_backend_config, chunks=self._chunks, flatfield=False, @@ -6163,6 +6668,12 @@ def _accept_with_store_path( mip_export=False, zarr_save=self._zarr_save_config, ) + _save_last_used_execution_policy( + replace( + self._execution_policy, + calibration_policy="use_if_available", + ) + ) _save_last_used_dask_backend_config(self._dask_backend_config) _save_last_used_zarr_save_config(self._zarr_save_config) self.accept() @@ -6276,6 +6787,7 @@ def _on_next(self) -> None: worker = BatchDataStoreMaterializationWorker( requests=pending_requests, + execution_policy=self._execution_policy, dask_backend=self._dask_backend_config, zarr_save=self._zarr_save_config, force_rebuild=rebuild_requested, @@ -6805,6 +7317,7 @@ def __init__(self, initial: WorkflowConfig) -> None: _analysis_targets_for_workflow(initial) ) self._active_analysis_target: Optional[AnalysisTarget] = None + self._execution_policy: ExecutionPolicy = initial.execution_policy self._dask_backend_config: DaskBackendConfig = initial.dask_backend self.result_config: Optional[WorkflowConfig] = None self._analysis_scope_combo: Optional[QComboBox] = None @@ -7661,7 +8174,7 @@ def _build_ui(self) -> None: max(28, int(self._status_label.fontMetrics().height()) + 10) ) status_stack.addWidget(self._status_label) - self._dask_backend_summary_label = QLabel("Dask backend: n/a") + self._dask_backend_summary_label = QLabel("Execution planning: n/a") self._dask_backend_summary_label.setObjectName("statusLabel") self._dask_backend_summary_label.setWordWrap(True) self._dask_backend_summary_label.setTextInteractionFlags( @@ -7675,7 +8188,7 @@ def _build_ui(self) -> None: ) status_stack.addWidget(self._dask_backend_summary_label) footer.addLayout(status_stack, 1) - self._dask_backend_button = QPushButton("Edit Dask Backend") + self._dask_backend_button = QPushButton("Edit Execution Planning") self._dask_dashboard_button = QPushButton("Open Dask Dashboard") self._cancel_button = QPushButton("Cancel") self._run_button = QPushButton("Run") @@ -11064,7 +11577,7 @@ def _set_parameter_help(self, text: str) -> None: self._parameter_help_label.setText(str(text)) def _refresh_dask_backend_summary(self) -> None: - """Refresh footer summary text for active Dask backend settings. + """Refresh footer summary text for active execution planning. Parameters ---------- @@ -11082,8 +11595,46 @@ def _refresh_dask_backend_summary(self) -> None: """ if self._dask_backend_summary_label is None: return - summary = format_dask_backend_summary(self._dask_backend_config) - text = f"Dask backend: {summary}" + workflow = WorkflowConfig( + file=self._base_config.file, + analysis_targets=self._analysis_targets, + analysis_selected_experiment_path=( + self._base_config.analysis_selected_experiment_path + ), + analysis_apply_to_all=bool( + self._analysis_apply_to_all_checkbox.isChecked() + if self._analysis_apply_to_all_checkbox is not None + else False + ), + prefer_dask=self._base_config.prefer_dask, + execution_policy=self._execution_policy, + dask_backend=self._dask_backend_config, + chunks=self._base_config.chunks, + flatfield=self._operation_checkboxes["flatfield"].isChecked(), + deconvolution=self._operation_checkboxes["deconvolution"].isChecked(), + shear_transform=self._operation_checkboxes["shear_transform"].isChecked(), + particle_detection=self._operation_checkboxes["particle_detection"].isChecked(), + usegment3d=self._operation_checkboxes["usegment3d"].isChecked(), + registration=self._operation_checkboxes["registration"].isChecked(), + visualization=self._operation_checkboxes["visualization"].isChecked(), + mip_export=self._operation_checkboxes["mip_export"].isChecked(), + zarr_save=self._base_config.zarr_save, + analysis_parameters=normalize_analysis_operation_parameters( + self._base_config.analysis_parameters + ), + ) + plan = plan_execution( + workflow, + workload="analysis", + shape_tpczyx=self._analysis_store_shape_tpczyx(), + chunks_tpczyx=self._base_config.zarr_save.chunks_tpczyx(), + dtype_itemsize=self._analysis_store_dtype_itemsize(), + calibration_profiles=_load_execution_calibration_profiles(), + ) + text = ( + f"Policy: {format_execution_policy_summary(self._execution_policy)}\n" + f"Plan: {format_execution_plan_summary(plan)}" + ) self._dask_backend_summary_label.setText(text) self._dask_backend_summary_label.setToolTip(text) self._refresh_dask_dashboard_button_state() @@ -11162,7 +11713,7 @@ def _analysis_store_dtype_itemsize(self) -> Optional[int]: return None def _on_edit_dask_backend(self) -> None: - """Open backend settings dialog and apply selected configuration. + """Open execution-planning dialog and apply selected configuration. Parameters ---------- @@ -11178,20 +11729,61 @@ def _on_edit_dask_backend(self) -> None: None Validation and persistence errors are handled internally. """ - dialog = DaskBackendConfigDialog( - initial=self._dask_backend_config, + dialog = ExecutionPolicyDialog( + initial_policy=self._execution_policy, + initial_backend=self._dask_backend_config, + workload="analysis", + summary_workflow_factory=lambda policy, backend: WorkflowConfig( + file=self._base_config.file, + analysis_targets=self._analysis_targets, + analysis_selected_experiment_path=( + self._base_config.analysis_selected_experiment_path + ), + analysis_apply_to_all=bool( + self._analysis_apply_to_all_checkbox.isChecked() + if self._analysis_apply_to_all_checkbox is not None + else False + ), + prefer_dask=self._base_config.prefer_dask, + execution_policy=policy, + dask_backend=backend, + chunks=self._base_config.chunks, + flatfield=self._operation_checkboxes["flatfield"].isChecked(), + deconvolution=self._operation_checkboxes["deconvolution"].isChecked(), + shear_transform=self._operation_checkboxes["shear_transform"].isChecked(), + particle_detection=self._operation_checkboxes["particle_detection"].isChecked(), + usegment3d=self._operation_checkboxes["usegment3d"].isChecked(), + registration=self._operation_checkboxes["registration"].isChecked(), + visualization=self._operation_checkboxes["visualization"].isChecked(), + mip_export=self._operation_checkboxes["mip_export"].isChecked(), + zarr_save=self._base_config.zarr_save, + analysis_parameters=normalize_analysis_operation_parameters( + self._base_config.analysis_parameters + ), + ), recommendation_shape_tpczyx=self._analysis_store_shape_tpczyx(), recommendation_chunks_tpczyx=self._base_config.zarr_save.chunks_tpczyx(), recommendation_dtype_itemsize=self._analysis_store_dtype_itemsize(), parent=self, ) result = dialog.exec() - if result != QDialog.DialogCode.Accepted or dialog.result_config is None: + if ( + result != QDialog.DialogCode.Accepted + or dialog.result_policy is None + or dialog.result_backend is None + ): return - self._dask_backend_config = dialog.result_config + self._execution_policy = dialog.result_policy + self._dask_backend_config = dialog.result_backend + _save_last_used_execution_policy( + replace( + self._execution_policy, + calibration_policy="use_if_available", + ) + ) _save_last_used_dask_backend_config(self._dask_backend_config) self._refresh_dask_backend_summary() - self._set_status("Updated Dask backend settings.") + self._set_status("Updated execution planning settings.") @staticmethod def _normalize_dashboard_url( @@ -11328,6 +11920,8 @@ def _resolve_dask_dashboard_url(self) -> Optional[str]: None Parsing failures are handled internally. """ + if self._execution_policy.mode != "advanced": + return None mode = str(self._dask_backend_config.mode).strip().lower() if mode == DASK_BACKEND_LOCAL_CLUSTER: return self._normalize_dashboard_url("127.0.0.1:8787") @@ -13897,6 +14491,7 @@ def _on_run(self) -> None: else False ), "prefer_dask": self._base_config.prefer_dask, + "execution_policy": self._execution_policy, "dask_backend": self._dask_backend_config, "chunks": self._base_config.chunks, "flatfield": selected_flags["flatfield"], @@ -13914,6 +14509,12 @@ def _on_run(self) -> None: workflow_kwargs["usegment3d"] = selected_flags["usegment3d"] self.result_config = WorkflowConfig(**workflow_kwargs) self._persist_analysis_gui_state_for_target(selected_target) + _save_last_used_execution_policy( + replace( + self._execution_policy, + calibration_policy="use_if_available", + ) + ) _save_last_used_dask_backend_config(self._dask_backend_config) sequence = self._selected_operations_in_sequence() sequence_text = " -> ".join( @@ -14010,12 +14611,26 @@ def launch_gui( settings_directory = _ensure_clearex_settings_directory() settings_path = _resolve_dask_backend_settings_path(settings_directory) + execution_policy_settings_path = _resolve_execution_policy_settings_path( + settings_directory + ) zarr_settings_path = _resolve_zarr_save_settings_path(settings_directory) effective_initial = initial or WorkflowConfig() + persisted_execution_policy = _load_last_used_execution_policy( + settings_path=execution_policy_settings_path + ) persisted_backend = _load_last_used_dask_backend_config(settings_path=settings_path) persisted_zarr_save = _load_last_used_zarr_save_config( settings_path=zarr_settings_path ) + if ( + persisted_execution_policy is not None + and _should_apply_persisted_execution_policy(initial) + ): + effective_initial = replace( + effective_initial, + execution_policy=persisted_execution_policy, + ) if persisted_backend is not None and _should_apply_persisted_dask_backend(initial): effective_initial = replace(effective_initial, dask_backend=persisted_backend) if persisted_zarr_save is not None and _should_apply_persisted_zarr_save(initial): @@ -14111,7 +14726,9 @@ def _reset_analysis_selection_for_next_run(workflow: WorkflowConfig) -> Workflow ), "analysis_apply_to_all": workflow.analysis_apply_to_all, "prefer_dask": workflow.prefer_dask, + "execution_policy": workflow.execution_policy, "dask_backend": workflow.dask_backend, + "execution_plan": None, "chunks": workflow.chunks, "flatfield": False, "deconvolution": False, diff --git a/src/clearex/io/cli.py b/src/clearex/io/cli.py index c87f366..f93497c 100644 --- a/src/clearex/io/cli.py +++ b/src/clearex/io/cli.py @@ -232,6 +232,32 @@ def create_parser() -> argparse.ArgumentParser: default=None, help="Chunk spec for Dask, e.g. '256,256,64' or single int", ) + parser.add_argument( + "--execution-mode", + type=str, + choices=("auto", "advanced"), + default=None, + help="Execution planning mode for Dask resources", + ) + parser.add_argument( + "--max-workers", + type=int, + default=None, + help="Maximum worker count for automatic execution planning", + ) + parser.add_argument( + "--memory-per-worker", + type=str, + default=None, + help="Preferred per-worker memory limit for automatic execution planning", + ) + parser.add_argument( + "--calibrate", + required=False, + default=False, + action="store_true", + help="Refresh the cached execution profile before planning", + ) parser.add_argument( "--gui", diff --git a/src/clearex/io/provenance.py b/src/clearex/io/provenance.py index 374156b..b022676 100644 --- a/src/clearex/io/provenance.py +++ b/src/clearex/io/provenance.py @@ -51,8 +51,12 @@ from clearex.workflow import ( WorkflowConfig, dask_backend_to_dict, + execution_plan_to_dict, + execution_policy_to_dict, format_dask_backend_summary, format_chunks, + format_execution_plan_summary, + format_execution_policy_summary, format_zarr_chunks_ptczyx, format_zarr_pyramid_ptczyx, ) @@ -359,6 +363,22 @@ def _default_steps(workflow: WorkflowConfig) -> list[Dict[str, Any]]: "name": "load_data", "parameters": { "prefer_dask": workflow.prefer_dask, + "execution_policy_summary": format_execution_policy_summary( + workflow.execution_policy + ), + "execution_policy": execution_policy_to_dict( + workflow.execution_policy + ), + "execution_plan_summary": ( + format_execution_plan_summary(workflow.execution_plan) + if workflow.execution_plan is not None + else None + ), + "execution_plan": ( + execution_plan_to_dict(workflow.execution_plan) + if workflow.execution_plan is not None + else None + ), "chunks": format_chunks(workflow.chunks) or None, "dask_backend_summary": format_dask_backend_summary( workflow.dask_backend @@ -982,6 +1002,20 @@ def persist_run_provenance( workflow_payload = { "file": workflow.file, "prefer_dask": workflow.prefer_dask, + "execution_policy_summary": format_execution_policy_summary( + workflow.execution_policy + ), + "execution_policy": execution_policy_to_dict(workflow.execution_policy), + "execution_plan_summary": ( + format_execution_plan_summary(workflow.execution_plan) + if workflow.execution_plan is not None + else None + ), + "execution_plan": ( + execution_plan_to_dict(workflow.execution_plan) + if workflow.execution_plan is not None + else None + ), "dask_backend_summary": format_dask_backend_summary(workflow.dask_backend), "dask_backend": dask_backend_to_dict(workflow.dask_backend), "chunks": format_chunks(workflow.chunks) or None, diff --git a/src/clearex/main.py b/src/clearex/main.py index 6a088c8..f8423f2 100644 --- a/src/clearex/main.py +++ b/src/clearex/main.py @@ -28,8 +28,9 @@ # Standard Library Imports from contextlib import ExitStack from datetime import datetime, timezone +import inspect from pathlib import Path -from typing import Any, Callable, Dict, Optional, Sequence +from typing import Any, Callable, Dict, Mapping, Optional, Sequence import argparse import json import logging @@ -120,17 +121,26 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) DASK_BACKEND_SLURM_CLUSTER, DASK_BACKEND_SLURM_RUNNER, AnalysisInputReference, + CalibrationProfile, DaskBackendConfig, - LocalClusterConfig, + ExecutionPolicy, WorkflowConfig, WorkflowExecutionCancelled, analysis_chainable_output_component, + calibration_profile_from_dict, + calibration_profile_to_dict, collect_analysis_input_references, dask_backend_from_dict, dask_backend_to_dict, + execution_plan_to_dict, + execution_policy_from_dict, + execution_policy_to_dict, format_dask_backend_summary, format_chunks, + format_execution_plan_summary, + format_execution_policy_summary, normalize_analysis_operation_parameters, + plan_execution, recommend_local_cluster_config, resolve_analysis_input_component, resolve_analysis_execution_sequence, @@ -142,6 +152,10 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) _CLEAREX_SETTINGS_DIR_NAME = ".clearex" _CLEAREX_DASK_BACKEND_SETTINGS_FILE = "dask_backend_settings.json" +_CLEAREX_EXECUTION_POLICY_SETTINGS_FILE = "execution_policy_settings.json" +_CLEAREX_EXECUTION_CALIBRATION_PROFILES_FILE = ( + "execution_calibration_profiles.json" +) _ANALYSIS_OPERATIONS_REQUIRING_DASK_CLIENT = frozenset( { @@ -476,10 +490,43 @@ def _build_workflow_config(args: argparse.Namespace) -> WorkflowConfig: ) persisted_dask_backend = _load_persisted_dask_backend_config() + persisted_execution_policy = _load_persisted_execution_policy() + effective_execution_policy = ( + persisted_execution_policy + if persisted_execution_policy is not None + else ExecutionPolicy() + ) + execution_mode_arg = getattr(args, "execution_mode", None) + max_workers_arg = getattr(args, "max_workers", None) + memory_per_worker_arg = getattr(args, "memory_per_worker", None) + refresh_calibration = bool(getattr(args, "calibrate", False)) + effective_execution_policy = ExecutionPolicy( + mode=( + str(execution_mode_arg).strip().lower() + if execution_mode_arg is not None and str(execution_mode_arg).strip() + else effective_execution_policy.mode + ), + max_workers=( + int(max_workers_arg) + if max_workers_arg is not None + else effective_execution_policy.max_workers + ), + memory_per_worker_limit=( + str(memory_per_worker_arg).strip() + if memory_per_worker_arg is not None and str(memory_per_worker_arg).strip() + else effective_execution_policy.memory_per_worker_limit + ), + calibration_policy=( + "refresh" + if refresh_calibration + else effective_execution_policy.calibration_policy + ), + ) return WorkflowConfig( file=args.file, prefer_dask=args.dask, + execution_policy=effective_execution_policy, dask_backend=( persisted_dask_backend if persisted_dask_backend is not None @@ -552,6 +599,77 @@ def _load_persisted_dask_backend_config() -> Optional[DaskBackendConfig]: return dask_backend_from_dict(payload) +def _resolve_persisted_execution_policy_settings_path() -> Path: + """Resolve the user settings JSON path for persisted execution policy.""" + return ( + Path.home() + / _CLEAREX_SETTINGS_DIR_NAME + / _CLEAREX_EXECUTION_POLICY_SETTINGS_FILE + ).expanduser() + + +def _resolve_persisted_execution_calibration_profiles_path() -> Path: + """Resolve the user settings JSON path for persisted calibration profiles.""" + return ( + Path.home() + / _CLEAREX_SETTINGS_DIR_NAME + / _CLEAREX_EXECUTION_CALIBRATION_PROFILES_FILE + ).expanduser() + + +def _load_persisted_execution_policy() -> Optional[ExecutionPolicy]: + """Load persisted execution policy for CLI/headless execution.""" + settings_path = _resolve_persisted_execution_policy_settings_path() + if not settings_path.exists(): + return None + try: + payload = json.loads(settings_path.read_text(encoding="utf-8")) + except Exception: + return None + if not isinstance(payload, dict) or not payload: + return None + return execution_policy_from_dict(payload) + + +def _load_persisted_execution_calibration_profiles() -> Dict[str, CalibrationProfile]: + """Load persisted execution calibration profiles.""" + settings_path = _resolve_persisted_execution_calibration_profiles_path() + if not settings_path.exists(): + return {} + try: + payload = json.loads(settings_path.read_text(encoding="utf-8")) + except Exception: + return {} + if not isinstance(payload, dict): + return {} + profiles: Dict[str, CalibrationProfile] = {} + for key, value in payload.items(): + profile = calibration_profile_from_dict(value) + if profile is None: + continue + profiles[str(key)] = profile + return profiles + + +def _save_persisted_execution_calibration_profiles( + profiles: Mapping[str, CalibrationProfile], +) -> None: + """Persist execution calibration profiles best-effort.""" + settings_path = _resolve_persisted_execution_calibration_profiles_path() + try: + settings_path.parent.mkdir(parents=True, exist_ok=True) + payload = { + str(key): calibration_profile_to_dict(profile) + for key, profile in profiles.items() + } + settings_path.write_text( + json.dumps(payload, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + except Exception: + return + + def _extract_axis_map(info: ImageInfo) -> Dict[str, int]: """Map axis labels to corresponding dimension sizes. @@ -760,6 +878,8 @@ def _configure_dask_backend( exit_stack: ExitStack, *, workload: str = "io", + shape_tpczyx: Optional[tuple[int, int, int, int, int, int]] = None, + dtype_itemsize: Optional[int] = None, ) -> Optional[Any]: """Initialize and register the configured Dask backend. @@ -787,107 +907,97 @@ def _configure_dask_backend( Backend initialization errors are converted into warnings and the workflow continues without a distributed client. This keeps local/headless paths operational even when optional Dask distributed backends are unavailable. - When LocalCluster ``n_workers`` is unset, runtime applies aggressive - host/data-aware defaults from - :func:`clearex.workflow.recommend_local_cluster_config`, including worker - count and, when left at defaults, thread and memory settings. """ if not workflow.prefer_dask: logger.info("Dask lazy loading disabled; skipping backend startup.") return None - backend = workflow.dask_backend workload_name = workload.strip().lower() + calibration_profiles = _load_persisted_execution_calibration_profiles() + execution_plan = plan_execution( + workflow, + workload=workload_name, + shape_tpczyx=shape_tpczyx, + dtype_itemsize=dtype_itemsize, + calibration_profiles=calibration_profiles, + ) + workflow.execution_plan = execution_plan + backend = execution_plan.backend_config + if execution_plan.calibration_profile is not None: + calibration_profiles[execution_plan.calibration_profile.profile_key] = ( + execution_plan.calibration_profile + ) + _save_persisted_execution_calibration_profiles(calibration_profiles) + + logger.info( + "Execution policy: %s", + format_execution_policy_summary(workflow.execution_policy), + ) logger.info( - "Dask backend selection: " - f"{format_dask_backend_summary(backend)} (workload={workload_name})" + "Execution plan: %s", + format_execution_plan_summary(execution_plan), ) try: if backend.mode == DASK_BACKEND_LOCAL_CLUSTER: local_cfg = backend.local_cluster - requested_processes = workload_name == "analysis" - default_local_cfg = LocalClusterConfig() - effective_n_workers = local_cfg.n_workers - effective_threads_per_worker = local_cfg.threads_per_worker - effective_memory_limit = local_cfg.memory_limit - if effective_n_workers is None: + detected_gpu_count = int(execution_plan.environment.gpu_count) + legacy_local_worker_cap = ( + int(workflow.dask_backend.local_cluster.n_workers) + if workflow.dask_backend.mode == DASK_BACKEND_LOCAL_CLUSTER + and workflow.dask_backend.local_cluster.n_workers is not None + else None + ) + if execution_plan.worker_kind == "gpu_process": recommendation = recommend_local_cluster_config( + shape_tpczyx=shape_tpczyx, chunks_tpczyx=workflow.zarr_save.chunks_tpczyx(), + dtype_itemsize=dtype_itemsize, ) - effective_n_workers = recommendation.config.n_workers - if local_cfg.threads_per_worker == default_local_cfg.threads_per_worker: - effective_threads_per_worker = ( - recommendation.config.threads_per_worker - ) - if ( - str(local_cfg.memory_limit).strip().lower() - == str(default_local_cfg.memory_limit).strip().lower() - ): - effective_memory_limit = recommendation.config.memory_limit - logger.info( - "Auto-selected aggressive LocalCluster settings from " - "host/data recommendation: " - f"workers={effective_n_workers}, " - f"threads_per_worker={effective_threads_per_worker}, " - f"memory_limit={effective_memory_limit}, " - f"gpus={recommendation.detected_gpu_count}." + detected_gpu_count = max( + detected_gpu_count, + int(recommendation.detected_gpu_count), ) - - if workload_name == "analysis": - gpu_worker_cap: Optional[int] = None - use_gpu_local_cluster = False - if bool(getattr(workflow, "usegment3d", False)): - try: - normalized_params = normalize_analysis_operation_parameters( - workflow.analysis_parameters - ) - except Exception: - normalized_params = {} - usegment3d_params = dict(normalized_params.get("usegment3d", {})) - gpu_requested = bool( - usegment3d_params.get("gpu", False) - or usegment3d_params.get("require_gpu", False) - ) - if gpu_requested: - gpu_recommendation = recommend_local_cluster_config( - chunks_tpczyx=workflow.zarr_save.chunks_tpczyx(), - ) - detected_gpu_count = int(gpu_recommendation.detected_gpu_count) - if detected_gpu_count > 0: - gpu_worker_cap = max(1, detected_gpu_count) - use_gpu_local_cluster = True - - if gpu_worker_cap is not None: - requested_workers = ( - int(effective_n_workers) - if effective_n_workers is not None - else int(gpu_worker_cap) - ) - if requested_workers > int(gpu_worker_cap): - logger.info( - "GPU-aware LocalCluster cap applied for analysis: " - f"requested_workers={requested_workers}, " - f"capped_workers={int(gpu_worker_cap)}." - ) - effective_n_workers = int(gpu_worker_cap) - else: - use_gpu_local_cluster = False - - effective_worker_count = ( - int(effective_n_workers) if effective_n_workers is not None else 1 + use_gpu_local_cluster = ( + execution_plan.worker_kind == "gpu_process" + and detected_gpu_count > 0 ) - use_processes = bool(requested_processes or effective_worker_count > 1) - if not requested_processes and use_processes: + effective_worker_count = int(local_cfg.n_workers or execution_plan.workers) + if ( + workload_name != "analysis" + and legacy_local_worker_cap is not None + and workflow.execution_policy.max_workers is None + ): + effective_worker_count = int(legacy_local_worker_cap) + elif use_gpu_local_cluster: + effective_worker_cap = int( + workflow.execution_policy.max_workers + if workflow.execution_policy.max_workers is not None + else ( + legacy_local_worker_cap + if legacy_local_worker_cap is not None + else effective_worker_count + ) + ) + effective_worker_count = min( + max(1, effective_worker_cap), + max(1, detected_gpu_count), + ) + use_processes = True + if execution_plan.worker_kind == "thread": + use_processes = False + elif workload_name != "analysis" and effective_worker_count <= 1: + use_processes = False + elif workload_name != "analysis" and effective_worker_count > 1: logger.info( "Using process-based LocalCluster for multi-worker I/O " "execution (memory isolation enabled)." ) client = create_dask_client( - n_workers=effective_n_workers, - threads_per_worker=effective_threads_per_worker, + n_workers=effective_worker_count, + threads_per_worker=int(local_cfg.threads_per_worker), processes=use_processes, - memory_limit=effective_memory_limit, + memory_limit=local_cfg.memory_limit, local_directory=local_cfg.local_directory, gpu_enabled=use_gpu_local_cluster, ) @@ -930,16 +1040,14 @@ def _configure_dask_backend( cluster_cfg = backend.slurm_cluster if ( - workload.strip().lower() == "analysis" + workload_name == "analysis" and int(cluster_cfg.processes) == 1 and int(cluster_cfg.cores) > 1 ): logger.warning( "SLURMCluster is configured with processes=1 and cores=%s. " - "CPU-bound Python analyses (for example shear transform) may " - "underutilize allocated CPUs with this layout. " - "For maximum process-level parallelism, increase processes " - "toward cores in the Dask backend configuration.", + "CPU-bound Python analyses may underutilize allocated CPUs " + "with this layout.", cluster_cfg.cores, ) extra_directives = [ @@ -996,6 +1104,43 @@ def _configure_dask_backend( return None +def _callable_accepts_keyword_argument( + callback: Callable[..., Any], + *, + keyword: str, +) -> bool: + """Return whether a callable accepts one keyword argument. + + Parameters + ---------- + callback : callable + Callable to inspect. + keyword : str + Keyword argument name. + + Returns + ------- + bool + ``True`` when the callable explicitly accepts the keyword or a + ``**kwargs`` catch-all. + + Notes + ----- + Inspection failures default to ``True`` so opaque callables remain + callable through this compatibility check. + """ + try: + signature = inspect.signature(callback) + except (TypeError, ValueError): + return True + if keyword in signature.parameters: + return True + return any( + parameter.kind is inspect.Parameter.VAR_KEYWORD + for parameter in signature.parameters.values() + ) + + def _run_workflow( workflow: WorkflowConfig, logger: logging.Logger, @@ -1173,6 +1318,14 @@ def _emit_analysis_progress(percent: int, message: str) -> None: "parameters": { "source_path": input_path, "prefer_dask": workflow.prefer_dask, + "execution_policy": execution_policy_to_dict( + workflow.execution_policy + ), + "execution_plan": ( + execution_plan_to_dict(workflow.execution_plan) + if workflow.execution_plan is not None + else None + ), "chunks": format_chunks(workflow.chunks) or None, "dask_backend": dask_backend_to_dict(workflow.dask_backend), }, @@ -1258,17 +1411,28 @@ def _emit_analysis_progress(percent: int, message: str) -> None: ) _emit_analysis_progress(100, str(first_issue.message)) - analysis_client = ( - _configure_dask_backend( - workflow=workflow, - logger=logger, - exit_stack=analysis_stack, - workload="analysis", - ) - if failure_exc is None + analysis_client = None + if ( + failure_exc is None and _analysis_execution_requires_dask_client(execution_sequence) - else None - ) + ): + configure_kwargs: Dict[str, Any] = { + "workflow": workflow, + "logger": logger, + "exit_stack": analysis_stack, + "workload": "analysis", + } + dtype_itemsize = ( + int(getattr(image_info.dtype, "itemsize", 0)) + if image_info is not None + else None + ) + if dtype_itemsize is not None and _callable_accepts_keyword_argument( + _configure_dask_backend, + keyword="dtype_itemsize", + ): + configure_kwargs["dtype_itemsize"] = dtype_itemsize + analysis_client = _configure_dask_backend(**configure_kwargs) produced_components: Dict[str, str] = {"data": "data"} total_operations = max(1, len(execution_sequence)) @@ -2379,7 +2543,9 @@ def _mip_export_progress(percent: int, message: str) -> None: provenance_workflow = WorkflowConfig( file=input_path, prefer_dask=workflow.prefer_dask, + execution_policy=workflow.execution_policy, dask_backend=workflow.dask_backend, + execution_plan=workflow.execution_plan, chunks=workflow.chunks, flatfield=workflow.flatfield, deconvolution=workflow.deconvolution, diff --git a/src/clearex/workflow.py b/src/clearex/workflow.py index eed8382..085c9cf 100644 --- a/src/clearex/workflow.py +++ b/src/clearex/workflow.py @@ -26,10 +26,14 @@ from copy import deepcopy from dataclasses import dataclass, field +import hashlib +from importlib.metadata import PackageNotFoundError, version +import json import math import os +import re import subprocess -from typing import Any, Collection, Dict, Literal, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Collection, Dict, Literal, Mapping, Optional, Sequence, Tuple, Union, cast ChunkSpec = Optional[Union[int, Tuple[int, ...]]] @@ -2761,6 +2765,26 @@ def zarr_save_from_dict(payload: Any) -> ZarrSaveConfig: DASK_BACKEND_SLURM_CLUSTER: "SLURMCluster", } +EXECUTION_POLICY_AUTO = "auto" +EXECUTION_POLICY_ADVANCED = "advanced" +ExecutionPolicyMode = Literal["auto", "advanced"] + +EXECUTION_CALIBRATION_USE_IF_AVAILABLE = "use_if_available" +EXECUTION_CALIBRATION_REFRESH = "refresh" +ExecutionCalibrationPolicy = Literal["use_if_available", "refresh"] + +EXECUTION_GPU_MODE_NEVER = "never" +EXECUTION_GPU_MODE_OPTIONAL = "optional" +EXECUTION_GPU_MODE_REQUIRED = "required" +ExecutionGpuMode = Literal["never", "optional", "required"] + +EXECUTION_WORKER_KIND_THREAD = "thread" +EXECUTION_WORKER_KIND_PROCESS = "process" +EXECUTION_WORKER_KIND_GPU_PROCESS = "gpu_process" +ExecutionWorkerKind = Literal["thread", "process", "gpu_process"] + +EXECUTION_PLAN_MODEL_VERSION = "1" + DEFAULT_SLURM_CLUSTER_JOB_EXTRA_DIRECTIVES: Tuple[str, ...] = ( "--nodes=1", "--ntasks=1", @@ -3218,6 +3242,316 @@ def __post_init__(self) -> None: object.__setattr__(self, "mode", mode) +@dataclass(frozen=True) +class ExecutionPolicy: + """Operator-facing execution-planning policy.""" + + mode: ExecutionPolicyMode = EXECUTION_POLICY_AUTO + max_workers: Optional[int] = None + memory_per_worker_limit: str = "auto" + calibration_policy: ExecutionCalibrationPolicy = ( + EXECUTION_CALIBRATION_USE_IF_AVAILABLE + ) + + def __post_init__(self) -> None: + """Validate execution-policy values.""" + mode = str(self.mode).strip().lower() + if mode not in {EXECUTION_POLICY_AUTO, EXECUTION_POLICY_ADVANCED}: + raise ValueError("Execution policy mode must be 'auto' or 'advanced'.") + object.__setattr__(self, "mode", mode) + object.__setattr__( + self, + "max_workers", + _normalize_optional_positive_int( + self.max_workers, + field_name="ExecutionPolicy max_workers", + ), + ) + memory_limit = ( + str(self.memory_per_worker_limit).strip() + if self.memory_per_worker_limit is not None + else "auto" + ) + object.__setattr__( + self, + "memory_per_worker_limit", + memory_limit or "auto", + ) + calibration_policy = str(self.calibration_policy).strip().lower() + if calibration_policy not in { + EXECUTION_CALIBRATION_USE_IF_AVAILABLE, + EXECUTION_CALIBRATION_REFRESH, + }: + raise ValueError( + "Execution policy calibration_policy must be " + "'use_if_available' or 'refresh'." + ) + object.__setattr__(self, "calibration_policy", calibration_policy) + + +@dataclass(frozen=True) +class AnalysisResourceDescriptor: + """Backend-agnostic resource model for one analysis operation.""" + + operation_name: str + chunk_basis: str + uses_overlap: bool + seed_memory_multiplier: float + seed_cpu_intensity: float + io_intensity: float + gpu_mode: ExecutionGpuMode = EXECUTION_GPU_MODE_NEVER + preferred_worker_kind: ExecutionWorkerKind = EXECUTION_WORKER_KIND_PROCESS + supports_chunk_calibration: bool = False + + def __post_init__(self) -> None: + """Validate descriptor values.""" + if float(self.seed_memory_multiplier) <= 0: + raise ValueError( + "AnalysisResourceDescriptor seed_memory_multiplier must be > 0." + ) + if float(self.seed_cpu_intensity) <= 0: + raise ValueError( + "AnalysisResourceDescriptor seed_cpu_intensity must be > 0." + ) + if float(self.io_intensity) < 0: + raise ValueError("AnalysisResourceDescriptor io_intensity cannot be negative.") + gpu_mode = str(self.gpu_mode).strip().lower() + if gpu_mode not in { + EXECUTION_GPU_MODE_NEVER, + EXECUTION_GPU_MODE_OPTIONAL, + EXECUTION_GPU_MODE_REQUIRED, + }: + raise ValueError("AnalysisResourceDescriptor gpu_mode is invalid.") + object.__setattr__(self, "gpu_mode", gpu_mode) + worker_kind = str(self.preferred_worker_kind).strip().lower() + if worker_kind not in { + EXECUTION_WORKER_KIND_THREAD, + EXECUTION_WORKER_KIND_PROCESS, + EXECUTION_WORKER_KIND_GPU_PROCESS, + }: + raise ValueError( + "AnalysisResourceDescriptor preferred_worker_kind is invalid." + ) + object.__setattr__(self, "preferred_worker_kind", worker_kind) + + +@dataclass(frozen=True) +class EnvironmentCapabilities: + """Detected execution-environment capabilities.""" + + cpu_count: int + memory_bytes: int + gpu_count: int + gpu_memory_bytes: Optional[int] + attached_scheduler_file: Optional[str] = None + scheduler_mode: str = "local" + + def __post_init__(self) -> None: + """Normalize environment-capability values.""" + object.__setattr__(self, "cpu_count", max(1, int(self.cpu_count))) + object.__setattr__(self, "memory_bytes", max(1 << 30, int(self.memory_bytes))) + object.__setattr__(self, "gpu_count", max(0, int(self.gpu_count))) + if self.gpu_memory_bytes is not None: + object.__setattr__( + self, + "gpu_memory_bytes", + max(1, int(self.gpu_memory_bytes)), + ) + object.__setattr__( + self, + "attached_scheduler_file", + _normalize_optional_text(self.attached_scheduler_file), + ) + scheduler_mode = str(self.scheduler_mode).strip().lower() or "local" + object.__setattr__(self, "scheduler_mode", scheduler_mode) + + +@dataclass(frozen=True) +class CalibrationProfile: + """Versioned execution-calibration profile.""" + + profile_key: str + operation_names: Tuple[str, ...] + parameter_signature: str + chunk_shape_tpczyx: Tuple[int, int, int, int, int, int] + dtype_itemsize: int + sample_chunk_count: int + estimated_peak_memory_bytes: int + estimated_seconds_per_chunk: float + cpu_utilization: float + source: str = "geometry_estimate" + confidence: float = 0.35 + environment_fingerprint: str = "" + software_version: str = "" + model_version: str = EXECUTION_PLAN_MODEL_VERSION + + def __post_init__(self) -> None: + """Validate profile values.""" + object.__setattr__(self, "profile_key", str(self.profile_key).strip()) + object.__setattr__( + self, + "operation_names", + tuple( + str(name).strip() for name in self.operation_names if str(name).strip() + ), + ) + object.__setattr__( + self, + "parameter_signature", + str(self.parameter_signature).strip(), + ) + object.__setattr__( + self, + "chunk_shape_tpczyx", + tuple(int(v) for v in self.chunk_shape_tpczyx), + ) + object.__setattr__(self, "dtype_itemsize", max(1, int(self.dtype_itemsize))) + object.__setattr__( + self, + "sample_chunk_count", + max(1, int(self.sample_chunk_count)), + ) + object.__setattr__( + self, + "estimated_peak_memory_bytes", + max(1, int(self.estimated_peak_memory_bytes)), + ) + object.__setattr__( + self, + "estimated_seconds_per_chunk", + max(0.01, float(self.estimated_seconds_per_chunk)), + ) + object.__setattr__( + self, + "cpu_utilization", + max(0.05, float(self.cpu_utilization)), + ) + object.__setattr__(self, "source", str(self.source).strip() or "geometry_estimate") + object.__setattr__( + self, + "confidence", + max(0.0, min(1.0, float(self.confidence))), + ) + object.__setattr__( + self, + "environment_fingerprint", + str(self.environment_fingerprint).strip(), + ) + object.__setattr__( + self, + "software_version", + str(self.software_version).strip(), + ) + object.__setattr__( + self, + "model_version", + str(self.model_version).strip() or EXECUTION_PLAN_MODEL_VERSION, + ) + + +@dataclass(frozen=True) +class WorkerEnvelope: + """Generic worker capacity envelope.""" + + cpus: int + memory_bytes: int + gpus: int = 0 + gpu_memory_bytes: Optional[int] = None + scratch_directory: Optional[str] = None + + def __post_init__(self) -> None: + """Normalize envelope values.""" + object.__setattr__(self, "cpus", max(1, int(self.cpus))) + object.__setattr__(self, "memory_bytes", max(1 << 30, int(self.memory_bytes))) + object.__setattr__(self, "gpus", max(0, int(self.gpus))) + if self.gpu_memory_bytes is not None: + object.__setattr__( + self, + "gpu_memory_bytes", + max(1, int(self.gpu_memory_bytes)), + ) + object.__setattr__( + self, + "scratch_directory", + _normalize_optional_text(self.scratch_directory), + ) + + +@dataclass(frozen=True) +class ExecutionPlan: + """Effective execution plan derived from workflow context.""" + + policy_mode: ExecutionPolicyMode + workload: str + selected_operations: Tuple[str, ...] + worker_kind: ExecutionWorkerKind + backend_config: DaskBackendConfig + workers: int + threads_per_worker: int + memory_per_worker_limit: str + estimated_chunk_bytes: int + estimated_working_set_bytes: int + estimated_chunk_count: Optional[int] + requires_gpu: bool + environment: EnvironmentCapabilities + calibration_profile: Optional[CalibrationProfile] = None + + def __post_init__(self) -> None: + """Normalize execution-plan values.""" + object.__setattr__( + self, + "policy_mode", + str(self.policy_mode).strip().lower() or EXECUTION_POLICY_AUTO, + ) + object.__setattr__( + self, + "workload", + str(self.workload).strip().lower() or "analysis", + ) + object.__setattr__( + self, + "selected_operations", + tuple( + str(name).strip() for name in self.selected_operations if str(name).strip() + ), + ) + worker_kind = str(self.worker_kind).strip().lower() + if worker_kind not in { + EXECUTION_WORKER_KIND_THREAD, + EXECUTION_WORKER_KIND_PROCESS, + EXECUTION_WORKER_KIND_GPU_PROCESS, + }: + raise ValueError("ExecutionPlan worker_kind is invalid.") + object.__setattr__(self, "worker_kind", worker_kind) + object.__setattr__(self, "workers", max(1, int(self.workers))) + object.__setattr__( + self, + "threads_per_worker", + max(1, int(self.threads_per_worker)), + ) + object.__setattr__( + self, + "memory_per_worker_limit", + str(self.memory_per_worker_limit).strip() or "auto", + ) + object.__setattr__( + self, + "estimated_chunk_bytes", + max(1, int(self.estimated_chunk_bytes)), + ) + object.__setattr__( + self, + "estimated_working_set_bytes", + max(1, int(self.estimated_working_set_bytes)), + ) + if self.estimated_chunk_count is not None: + object.__setattr__( + self, + "estimated_chunk_count", + max(1, int(self.estimated_chunk_count)), + ) + + @dataclass(frozen=True) class LocalClusterRecommendation: """Recommended LocalCluster settings derived from host and data context. @@ -3711,6 +4045,436 @@ def format_local_cluster_recommendation_summary( return " | ".join(parts) +_MEMORY_TEXT_PATTERN = re.compile( + r"^\s*(?P\d+(?:\.\d+)?)\s*(?P[kmgt]?i?b)?\s*$", + re.IGNORECASE, +) + + +def _parse_memory_limit_bytes(value: Optional[str]) -> Optional[int]: + """Parse a human-readable memory limit into bytes.""" + if value is None: + return None + text = str(value).strip() + if not text or text.lower() == "auto": + return None + match = _MEMORY_TEXT_PATTERN.match(text) + if match is None: + return None + scalar = float(match.group("value")) + unit = str(match.group("unit") or "b").lower() + multipliers = { + "b": 1, + "kb": 1000, + "mb": 1000**2, + "gb": 1000**3, + "tb": 1000**4, + "kib": 1 << 10, + "mib": 1 << 20, + "gib": 1 << 30, + "tib": 1 << 40, + } + multiplier = multipliers.get(unit) + if multiplier is None: + return None + return max(1, int(scalar * multiplier)) + + +def _clearex_software_version() -> str: + """Return a best-effort ClearEx software version string.""" + try: + return str(version("clearex")).strip() + except PackageNotFoundError: + return "unknown" + except Exception: + return "unknown" + + +def _environment_fingerprint(capabilities: EnvironmentCapabilities) -> str: + """Build a stable environment fingerprint for profile keys.""" + parts = [ + f"cpu={capabilities.cpu_count}", + f"memory={capabilities.memory_bytes}", + f"gpu={capabilities.gpu_count}", + f"gpu_memory={capabilities.gpu_memory_bytes or 0}", + f"scheduler={capabilities.scheduler_mode}", + ] + if capabilities.attached_scheduler_file: + parts.append(f"scheduler_file={capabilities.attached_scheduler_file}") + return "|".join(parts) + + +def detect_environment_capabilities( + *, + scheduler_file: Optional[str] = None, +) -> EnvironmentCapabilities: + """Detect generic execution-environment capabilities.""" + detected_gpu_count, detected_gpu_memory = _detect_local_gpu_info() + attached_scheduler_file = _normalize_optional_text( + scheduler_file or os.environ.get("DASK_SCHEDULER_FILE") + ) + scheduler_mode = "attached_scheduler" if attached_scheduler_file else "local" + return EnvironmentCapabilities( + cpu_count=_detect_local_cpu_count(), + memory_bytes=_detect_local_memory_bytes(), + gpu_count=detected_gpu_count, + gpu_memory_bytes=detected_gpu_memory, + attached_scheduler_file=attached_scheduler_file, + scheduler_mode=scheduler_mode, + ) + + +def default_analysis_resource_descriptors() -> Dict[str, AnalysisResourceDescriptor]: + """Return seeded analysis resource descriptors keyed by operation name.""" + descriptor_defaults: Dict[str, Dict[str, Any]] = { + "flatfield": { + "seed_cpu_intensity": 0.9, + "io_intensity": 0.4, + "gpu_mode": EXECUTION_GPU_MODE_NEVER, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + "deconvolution": { + "seed_cpu_intensity": 1.2, + "io_intensity": 0.35, + "gpu_mode": EXECUTION_GPU_MODE_OPTIONAL, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + "shear_transform": { + "seed_cpu_intensity": 1.1, + "io_intensity": 0.25, + "gpu_mode": EXECUTION_GPU_MODE_NEVER, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + "particle_detection": { + "seed_cpu_intensity": 0.85, + "io_intensity": 0.2, + "gpu_mode": EXECUTION_GPU_MODE_NEVER, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + "usegment3d": { + "seed_cpu_intensity": 1.0, + "io_intensity": 0.3, + "gpu_mode": EXECUTION_GPU_MODE_OPTIONAL, + "preferred_worker_kind": EXECUTION_WORKER_KIND_GPU_PROCESS, + }, + "registration": { + "seed_cpu_intensity": 0.9, + "io_intensity": 0.35, + "gpu_mode": EXECUTION_GPU_MODE_NEVER, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + "visualization": { + "seed_cpu_intensity": 0.4, + "io_intensity": 0.6, + "gpu_mode": EXECUTION_GPU_MODE_OPTIONAL, + "preferred_worker_kind": EXECUTION_WORKER_KIND_THREAD, + }, + "mip_export": { + "seed_cpu_intensity": 0.6, + "io_intensity": 0.8, + "gpu_mode": EXECUTION_GPU_MODE_NEVER, + "preferred_worker_kind": EXECUTION_WORKER_KIND_PROCESS, + }, + } + descriptors: Dict[str, AnalysisResourceDescriptor] = {} + normalized_defaults = normalize_analysis_operation_parameters(None) + for operation_name in ANALYSIS_OPERATION_ORDER: + params = dict(normalized_defaults.get(operation_name, {})) + descriptor_overrides = descriptor_defaults.get(operation_name, {}) + descriptors[operation_name] = AnalysisResourceDescriptor( + operation_name=operation_name, + chunk_basis=str(params.get("chunk_basis", "3d")).strip() or "3d", + uses_overlap=bool(params.get("use_map_overlap", False)), + seed_memory_multiplier=float(params.get("memory_overhead_factor", 1.0)), + seed_cpu_intensity=float( + descriptor_overrides.get("seed_cpu_intensity", 1.0) + ), + io_intensity=float(descriptor_overrides.get("io_intensity", 0.25)), + gpu_mode=str( + descriptor_overrides.get("gpu_mode", EXECUTION_GPU_MODE_NEVER) + ), + preferred_worker_kind=str( + descriptor_overrides.get( + "preferred_worker_kind", + EXECUTION_WORKER_KIND_PROCESS, + ) + ), + supports_chunk_calibration=False, + ) + return descriptors + + +def _selected_analysis_parameter_signature( + operation_names: Sequence[str], + analysis_parameters: Optional[Mapping[str, Mapping[str, Any]]], +) -> str: + """Return a stable parameter signature for selected operations.""" + payload: Dict[str, Any] = {} + normalized = normalize_analysis_operation_parameters(analysis_parameters) + for operation_name in operation_names: + payload[str(operation_name)] = normalized.get(str(operation_name), {}) + return json.dumps(payload, sort_keys=True, default=str, separators=(",", ":")) + + +def build_execution_calibration_profile( + *, + operation_names: Sequence[str], + analysis_parameters: Optional[Mapping[str, Mapping[str, Any]]], + chunks_tpczyx: Tuple[int, int, int, int, int, int], + dtype_itemsize: int, + capabilities: EnvironmentCapabilities, + descriptors: Optional[Mapping[str, AnalysisResourceDescriptor]] = None, + estimated_chunk_count: Optional[int] = None, +) -> CalibrationProfile: + """Build a versioned execution profile from dataset geometry and defaults.""" + descriptor_map = ( + dict(descriptors) if descriptors is not None else default_analysis_resource_descriptors() + ) + normalized = normalize_analysis_operation_parameters(analysis_parameters) + selected_operations = tuple( + str(name).strip() for name in operation_names if str(name).strip() + ) + if not selected_operations: + selected_operations = ("analysis",) + itemsize = max(1, int(dtype_itemsize)) + effective_chunks = tuple(int(v) for v in chunks_tpczyx) + estimated_chunk_bytes = max(1, math.prod(effective_chunks) * itemsize) + + memory_multiplier = 1.0 + cpu_intensity = 0.5 + overlap_factor = 1.0 + for operation_name in selected_operations: + params = dict(normalized.get(operation_name, {})) + descriptor = descriptor_map.get(operation_name) + if descriptor is None: + continue + memory_multiplier = max( + memory_multiplier, + float(descriptor.seed_memory_multiplier), + ) + cpu_intensity = max(cpu_intensity, float(descriptor.seed_cpu_intensity)) + if bool(params.get("use_map_overlap", descriptor.uses_overlap)): + overlap_zyx = params.get("overlap_zyx", [0, 0, 0]) + if isinstance(overlap_zyx, Collection) and len(overlap_zyx) == 3: + zyx_chunks = effective_chunks[3:] + candidate_factor = 1.0 + for chunk_value, overlap_value in zip( + zyx_chunks, + overlap_zyx, + strict=False, + ): + chunk_size = max(1, int(chunk_value)) + overlap_size = max(0, int(overlap_value)) + candidate_factor *= min( + 4.0, + (chunk_size + (2 * overlap_size)) / chunk_size, + ) + overlap_factor = max(overlap_factor, candidate_factor) + + estimated_peak_memory_bytes = max( + estimated_chunk_bytes, + int(math.ceil(estimated_chunk_bytes * memory_multiplier * overlap_factor)), + ) + estimated_seconds_per_chunk = max( + 0.05, + round( + (estimated_peak_memory_bytes / float(256 << 20)) + * max(0.25, float(cpu_intensity)), + 3, + ), + ) + sample_chunk_count = max( + 1, + min(10, int(estimated_chunk_count or 10)), + ) + parameter_signature = _selected_analysis_parameter_signature( + selected_operations, + normalized, + ) + fingerprint = _environment_fingerprint(capabilities) + key_material = { + "model_version": EXECUTION_PLAN_MODEL_VERSION, + "operations": list(selected_operations), + "parameter_signature": parameter_signature, + "chunks_tpczyx": list(effective_chunks), + "dtype_itemsize": itemsize, + "environment": fingerprint, + "software_version": _clearex_software_version(), + } + profile_key = hashlib.sha256( + json.dumps(key_material, sort_keys=True, separators=(",", ":")).encode( + "utf-8" + ) + ).hexdigest() + return CalibrationProfile( + profile_key=profile_key, + operation_names=selected_operations, + parameter_signature=parameter_signature, + chunk_shape_tpczyx=effective_chunks, + dtype_itemsize=itemsize, + sample_chunk_count=sample_chunk_count, + estimated_peak_memory_bytes=estimated_peak_memory_bytes, + estimated_seconds_per_chunk=estimated_seconds_per_chunk, + cpu_utilization=min(1.0, max(0.1, cpu_intensity / 1.25)), + source="geometry_estimate", + confidence=0.35, + environment_fingerprint=fingerprint, + software_version=_clearex_software_version(), + model_version=EXECUTION_PLAN_MODEL_VERSION, + ) + + +def execution_policy_to_dict(config: ExecutionPolicy) -> Dict[str, Any]: + """Serialize execution policy into a JSON-friendly mapping.""" + return { + "mode": config.mode, + "max_workers": config.max_workers, + "memory_per_worker_limit": config.memory_per_worker_limit, + "calibration_policy": config.calibration_policy, + } + + +def execution_policy_from_dict(payload: Any) -> ExecutionPolicy: + """Deserialize an execution-policy mapping.""" + defaults = ExecutionPolicy() + if not isinstance(payload, Mapping): + return defaults + try: + return ExecutionPolicy( + mode=str(payload.get("mode", defaults.mode)).strip().lower() + or defaults.mode, + max_workers=payload.get("max_workers", defaults.max_workers), + memory_per_worker_limit=payload.get( + "memory_per_worker_limit", + defaults.memory_per_worker_limit, + ), + calibration_policy=payload.get( + "calibration_policy", + defaults.calibration_policy, + ), + ) + except Exception: + return defaults + + +def calibration_profile_to_dict(profile: CalibrationProfile) -> Dict[str, Any]: + """Serialize a calibration profile.""" + return { + "profile_key": profile.profile_key, + "operation_names": list(profile.operation_names), + "parameter_signature": profile.parameter_signature, + "chunk_shape_tpczyx": list(profile.chunk_shape_tpczyx), + "dtype_itemsize": profile.dtype_itemsize, + "sample_chunk_count": profile.sample_chunk_count, + "estimated_peak_memory_bytes": profile.estimated_peak_memory_bytes, + "estimated_seconds_per_chunk": profile.estimated_seconds_per_chunk, + "cpu_utilization": profile.cpu_utilization, + "source": profile.source, + "confidence": profile.confidence, + "environment_fingerprint": profile.environment_fingerprint, + "software_version": profile.software_version, + "model_version": profile.model_version, + } + + +def calibration_profile_from_dict(payload: Any) -> Optional[CalibrationProfile]: + """Deserialize a calibration profile mapping.""" + if not isinstance(payload, Mapping): + return None + try: + return CalibrationProfile( + profile_key=str(payload.get("profile_key", "")).strip(), + operation_names=tuple(payload.get("operation_names", tuple())), + parameter_signature=str(payload.get("parameter_signature", "")).strip(), + chunk_shape_tpczyx=tuple(payload.get("chunk_shape_tpczyx", (1, 1, 1, 256, 256, 256))), + dtype_itemsize=payload.get("dtype_itemsize", 2), + sample_chunk_count=payload.get("sample_chunk_count", 1), + estimated_peak_memory_bytes=payload.get("estimated_peak_memory_bytes", 1), + estimated_seconds_per_chunk=payload.get("estimated_seconds_per_chunk", 0.1), + cpu_utilization=payload.get("cpu_utilization", 0.5), + source=str(payload.get("source", "geometry_estimate")).strip(), + confidence=payload.get("confidence", 0.35), + environment_fingerprint=str( + payload.get("environment_fingerprint", "") + ).strip(), + software_version=str(payload.get("software_version", "")).strip(), + model_version=str(payload.get("model_version", EXECUTION_PLAN_MODEL_VERSION)).strip() + or EXECUTION_PLAN_MODEL_VERSION, + ) + except Exception: + return None + + +def environment_capabilities_to_dict( + capabilities: EnvironmentCapabilities, +) -> Dict[str, Any]: + """Serialize environment capabilities.""" + return { + "cpu_count": capabilities.cpu_count, + "memory_bytes": capabilities.memory_bytes, + "gpu_count": capabilities.gpu_count, + "gpu_memory_bytes": capabilities.gpu_memory_bytes, + "attached_scheduler_file": capabilities.attached_scheduler_file, + "scheduler_mode": capabilities.scheduler_mode, + } + + +def execution_plan_to_dict(plan: ExecutionPlan) -> Dict[str, Any]: + """Serialize an execution plan.""" + return { + "policy_mode": plan.policy_mode, + "workload": plan.workload, + "selected_operations": list(plan.selected_operations), + "worker_kind": plan.worker_kind, + "workers": plan.workers, + "threads_per_worker": plan.threads_per_worker, + "memory_per_worker_limit": plan.memory_per_worker_limit, + "estimated_chunk_bytes": plan.estimated_chunk_bytes, + "estimated_working_set_bytes": plan.estimated_working_set_bytes, + "estimated_chunk_count": plan.estimated_chunk_count, + "requires_gpu": plan.requires_gpu, + "backend_config": dask_backend_to_dict(plan.backend_config), + "environment": environment_capabilities_to_dict(plan.environment), + "calibration_profile": ( + calibration_profile_to_dict(plan.calibration_profile) + if plan.calibration_profile is not None + else None + ), + } + + +def format_execution_policy_summary(config: ExecutionPolicy) -> str: + """Format a compact execution-policy summary.""" + max_workers_text = ( + str(config.max_workers) if config.max_workers is not None else "auto" + ) + return ( + f"{config.mode} " + f"(max_workers={max_workers_text}, " + f"memory_per_worker={config.memory_per_worker_limit}, " + f"calibration={config.calibration_policy})" + ) + + +def format_execution_plan_summary(plan: ExecutionPlan) -> str: + """Format a compact execution-plan summary.""" + backend_summary = format_dask_backend_summary(plan.backend_config) + parts = [ + backend_summary, + f"workers={plan.workers}", + f"threads={plan.threads_per_worker}", + f"memory={plan.memory_per_worker_limit}", + f"~{_format_binary_size(plan.estimated_working_set_bytes)} working set/chunk", + ] + if plan.requires_gpu: + parts.append("gpu=yes") + if plan.calibration_profile is not None: + parts.append( + f"profile={plan.calibration_profile.source}:{plan.calibration_profile.confidence:.2f}" + ) + return " | ".join(parts) + + def dask_backend_to_dict(config: DaskBackendConfig) -> Dict[str, Any]: """Serialize Dask backend config into JSON-friendly mappings. @@ -3903,10 +4667,13 @@ def dask_backend_from_dict(payload: Any) -> DaskBackendConfig: slurm_cluster = defaults.slurm_cluster mode_value = str(payload.get("mode", defaults.mode)).strip().lower() - mode = ( - mode_value - if mode_value in DASK_BACKEND_MODE_LABELS - else DASK_BACKEND_LOCAL_CLUSTER + mode = cast( + DaskBackendMode, + ( + mode_value + if mode_value in DASK_BACKEND_MODE_LABELS + else DASK_BACKEND_LOCAL_CLUSTER + ), ) try: @@ -4045,8 +4812,12 @@ class WorkflowConfig: ``analysis_targets`` instead of only the selected one. prefer_dask : bool Whether to open data using lazy Dask-backed arrays when supported. + execution_policy : ExecutionPolicy + Operator-facing execution-planning policy used for automatic sizing. dask_backend : DaskBackendConfig - Backend orchestration mode and runtime settings for Dask execution. + Advanced backend orchestration override and persisted scheduler hints. + execution_plan : ExecutionPlan, optional + Effective execution plan derived at runtime. chunks : int or tuple of int, optional Chunking configuration used for Dask reads. flatfield : bool @@ -4076,7 +4847,9 @@ class WorkflowConfig: analysis_selected_experiment_path: Optional[str] = None analysis_apply_to_all: bool = False prefer_dask: bool = True + execution_policy: ExecutionPolicy = field(default_factory=ExecutionPolicy) dask_backend: DaskBackendConfig = field(default_factory=DaskBackendConfig) + execution_plan: Optional[ExecutionPlan] = None chunks: ChunkSpec = None flatfield: bool = False deconvolution: bool = False @@ -4108,6 +4881,8 @@ def __post_init__(self) -> None: ValueError If analysis parameter mappings are invalid. """ + if not isinstance(self.execution_policy, ExecutionPolicy): + self.execution_policy = execution_policy_from_dict(self.execution_policy) self.analysis_targets = normalize_analysis_targets(self.analysis_targets) selected_experiment_path = ( str(self.analysis_selected_experiment_path).strip() @@ -4196,6 +4971,454 @@ def selected_analysis_target(self) -> Optional[AnalysisTarget]: return None +def _selected_operations_for_execution_plan( + workflow: WorkflowConfig, + *, + workload: str, + analysis_parameters: Optional[Dict[str, Dict[str, Any]]] = None, +) -> tuple[str, ...]: + """Return ordered operations relevant to an execution plan.""" + workload_name = str(workload).strip().lower() or "analysis" + if workload_name != "analysis": + return ("io",) + return tuple( + resolve_analysis_execution_sequence( + flatfield=workflow.flatfield, + deconvolution=workflow.deconvolution, + shear_transform=workflow.shear_transform, + particle_detection=workflow.particle_detection, + usegment3d=workflow.usegment3d, + registration=workflow.registration, + visualization=workflow.visualization, + mip_export=workflow.mip_export, + analysis_parameters=analysis_parameters or workflow.analysis_parameters, + ) + ) + + +def _effective_analysis_descriptor( + operation_name: str, + parameters: Optional[Mapping[str, Any]], + *, + seeded_descriptors: Optional[Mapping[str, AnalysisResourceDescriptor]] = None, +) -> AnalysisResourceDescriptor: + """Return one effective descriptor for a selected analysis operation.""" + descriptor_map = ( + dict(seeded_descriptors) + if seeded_descriptors is not None + else default_analysis_resource_descriptors() + ) + base = descriptor_map.get( + str(operation_name), + AnalysisResourceDescriptor( + operation_name=str(operation_name), + chunk_basis="3d", + uses_overlap=False, + seed_memory_multiplier=1.0, + seed_cpu_intensity=1.0, + io_intensity=0.25, + ), + ) + params = dict(parameters or {}) + gpu_mode = base.gpu_mode + preferred_worker_kind = base.preferred_worker_kind + if str(operation_name) == "usegment3d": + if bool(params.get("require_gpu", False)): + gpu_mode = EXECUTION_GPU_MODE_REQUIRED + preferred_worker_kind = EXECUTION_WORKER_KIND_GPU_PROCESS + elif bool(params.get("gpu", False)): + gpu_mode = EXECUTION_GPU_MODE_OPTIONAL + preferred_worker_kind = EXECUTION_WORKER_KIND_GPU_PROCESS + else: + gpu_mode = EXECUTION_GPU_MODE_NEVER + preferred_worker_kind = EXECUTION_WORKER_KIND_PROCESS + elif str(operation_name) == "visualization" and not bool( + params.get("require_gpu_rendering", True) + ): + gpu_mode = EXECUTION_GPU_MODE_NEVER + elif str(operation_name) == "deconvolution" and bool(params.get("gpu_job", False)): + gpu_mode = EXECUTION_GPU_MODE_OPTIONAL + preferred_worker_kind = EXECUTION_WORKER_KIND_GPU_PROCESS + return AnalysisResourceDescriptor( + operation_name=str(operation_name), + chunk_basis=str(params.get("chunk_basis", base.chunk_basis)).strip() + or base.chunk_basis, + uses_overlap=bool(params.get("use_map_overlap", base.uses_overlap)), + seed_memory_multiplier=float( + params.get("memory_overhead_factor", base.seed_memory_multiplier) + ), + seed_cpu_intensity=float(base.seed_cpu_intensity), + io_intensity=float(base.io_intensity), + gpu_mode=gpu_mode, + preferred_worker_kind=preferred_worker_kind, + supports_chunk_calibration=base.supports_chunk_calibration, + ) + + +def _aggregate_execution_descriptor( + *, + workload: str, + operation_names: Sequence[str], + analysis_parameters: Optional[Dict[str, Dict[str, Any]]], +) -> AnalysisResourceDescriptor: + """Aggregate selected operations into one planning descriptor.""" + workload_name = str(workload).strip().lower() or "analysis" + if workload_name != "analysis": + return AnalysisResourceDescriptor( + operation_name="io", + chunk_basis="3d", + uses_overlap=False, + seed_memory_multiplier=1.5, + seed_cpu_intensity=0.65, + io_intensity=1.0, + gpu_mode=EXECUTION_GPU_MODE_NEVER, + preferred_worker_kind=EXECUTION_WORKER_KIND_PROCESS, + supports_chunk_calibration=False, + ) + + normalized = normalize_analysis_operation_parameters(analysis_parameters) + effective_descriptors = [ + _effective_analysis_descriptor( + operation_name, + normalized.get(str(operation_name), {}), + ) + for operation_name in operation_names + ] + if not effective_descriptors: + return AnalysisResourceDescriptor( + operation_name="analysis", + chunk_basis="3d", + uses_overlap=False, + seed_memory_multiplier=1.0, + seed_cpu_intensity=1.0, + io_intensity=0.25, + gpu_mode=EXECUTION_GPU_MODE_NEVER, + preferred_worker_kind=EXECUTION_WORKER_KIND_PROCESS, + supports_chunk_calibration=False, + ) + + gpu_mode = EXECUTION_GPU_MODE_NEVER + preferred_worker_kind = EXECUTION_WORKER_KIND_THREAD + for descriptor in effective_descriptors: + if descriptor.gpu_mode == EXECUTION_GPU_MODE_REQUIRED: + gpu_mode = EXECUTION_GPU_MODE_REQUIRED + elif ( + descriptor.gpu_mode == EXECUTION_GPU_MODE_OPTIONAL + and gpu_mode != EXECUTION_GPU_MODE_REQUIRED + ): + gpu_mode = EXECUTION_GPU_MODE_OPTIONAL + if descriptor.preferred_worker_kind == EXECUTION_WORKER_KIND_GPU_PROCESS: + preferred_worker_kind = EXECUTION_WORKER_KIND_GPU_PROCESS + elif ( + descriptor.preferred_worker_kind == EXECUTION_WORKER_KIND_PROCESS + and preferred_worker_kind != EXECUTION_WORKER_KIND_GPU_PROCESS + ): + preferred_worker_kind = EXECUTION_WORKER_KIND_PROCESS + + return AnalysisResourceDescriptor( + operation_name="analysis_sequence", + chunk_basis=( + "3d" + if any(desc.chunk_basis == "3d" for desc in effective_descriptors) + else "2d" + ), + uses_overlap=any(desc.uses_overlap for desc in effective_descriptors), + seed_memory_multiplier=max( + float(desc.seed_memory_multiplier) for desc in effective_descriptors + ), + seed_cpu_intensity=max( + float(desc.seed_cpu_intensity) for desc in effective_descriptors + ), + io_intensity=max(float(desc.io_intensity) for desc in effective_descriptors), + gpu_mode=gpu_mode, + preferred_worker_kind=preferred_worker_kind, + supports_chunk_calibration=any( + desc.supports_chunk_calibration for desc in effective_descriptors + ), + ) + + +def _estimate_chunk_count( + shape_tpczyx: Optional[Tuple[int, int, int, int, int, int]], + chunks_tpczyx: Tuple[int, int, int, int, int, int], +) -> Optional[int]: + """Estimate the number of chunks in a canonical dataset.""" + if shape_tpczyx is None: + return None + return max( + 1, + math.prod( + [ + max(1, math.ceil(int(dim) / max(1, int(chunk)))) + for dim, chunk in zip(shape_tpczyx, chunks_tpczyx, strict=False) + ] + ), + ) + + +def plan_execution( + workflow: WorkflowConfig, + *, + workload: str = "analysis", + shape_tpczyx: Optional[Tuple[int, int, int, int, int, int]] = None, + chunks_tpczyx: Optional[Tuple[int, int, int, int, int, int]] = None, + dtype_itemsize: Optional[int] = None, + calibration_profiles: Optional[Mapping[str, CalibrationProfile]] = None, +) -> ExecutionPlan: + """Derive an execution plan for the requested workflow context.""" + workload_name = str(workload).strip().lower() or "analysis" + effective_chunks = cast( + Tuple[int, int, int, int, int, int], + tuple( + int(v) + for v in ( + chunks_tpczyx + if chunks_tpczyx is not None + else workflow.zarr_save.chunks_tpczyx() + ) + ), + ) + itemsize = max(1, int(dtype_itemsize or 2)) + estimated_chunk_bytes = max(1, math.prod(effective_chunks) * itemsize) + normalized_parameters = normalize_analysis_operation_parameters( + workflow.analysis_parameters + ) + selected_operations = _selected_operations_for_execution_plan( + workflow, + workload=workload_name, + analysis_parameters=normalized_parameters, + ) + descriptor = _aggregate_execution_descriptor( + workload=workload_name, + operation_names=selected_operations, + analysis_parameters=normalized_parameters, + ) + capabilities = detect_environment_capabilities( + scheduler_file=workflow.dask_backend.slurm_runner.scheduler_file + ) + estimated_chunk_count = _estimate_chunk_count(shape_tpczyx, effective_chunks) + calibration_profile = build_execution_calibration_profile( + operation_names=selected_operations, + analysis_parameters=normalized_parameters, + chunks_tpczyx=effective_chunks, + dtype_itemsize=itemsize, + capabilities=capabilities, + estimated_chunk_count=estimated_chunk_count, + ) + if ( + workflow.execution_policy.calibration_policy + == EXECUTION_CALIBRATION_USE_IF_AVAILABLE + and calibration_profiles is not None + ): + cached_profile = calibration_profiles.get(calibration_profile.profile_key) + if cached_profile is not None: + calibration_profile = cached_profile + + if workflow.execution_policy.mode == EXECUTION_POLICY_ADVANCED: + backend_config = workflow.dask_backend + workers = 1 + threads_per_worker = 1 + memory_per_worker_limit = "auto" + if backend_config.mode == DASK_BACKEND_LOCAL_CLUSTER: + if backend_config.local_cluster.n_workers is None: + recommendation = recommend_local_cluster_config( + shape_tpczyx=shape_tpczyx, + chunks_tpczyx=effective_chunks, + dtype_itemsize=itemsize, + cpu_count=capabilities.cpu_count, + memory_bytes=capabilities.memory_bytes, + gpu_count=capabilities.gpu_count, + gpu_memory_bytes=capabilities.gpu_memory_bytes, + ) + workers = int(recommendation.config.n_workers or 1) + threads_per_worker = int(recommendation.config.threads_per_worker) + memory_per_worker_limit = str(recommendation.config.memory_limit) + backend_config = DaskBackendConfig( + mode=DASK_BACKEND_LOCAL_CLUSTER, + local_cluster=LocalClusterConfig( + n_workers=workers, + threads_per_worker=threads_per_worker, + memory_limit=memory_per_worker_limit, + local_directory=backend_config.local_cluster.local_directory, + ), + slurm_runner=backend_config.slurm_runner, + slurm_cluster=backend_config.slurm_cluster, + ) + else: + workers = int(backend_config.local_cluster.n_workers or 1) + threads_per_worker = int(backend_config.local_cluster.threads_per_worker) + memory_per_worker_limit = str(backend_config.local_cluster.memory_limit) + elif backend_config.mode == DASK_BACKEND_SLURM_CLUSTER: + workers = int(backend_config.slurm_cluster.workers) + threads_per_worker = int( + max( + 1, + int(backend_config.slurm_cluster.cores) + // max(1, int(backend_config.slurm_cluster.processes)), + ) + ) + memory_per_worker_limit = str(backend_config.slurm_cluster.memory) + elif backend_config.mode == DASK_BACKEND_SLURM_RUNNER: + workers = max( + 1, + int(backend_config.slurm_runner.wait_for_workers or 1), + ) + threads_per_worker = 1 + memory_per_worker_limit = workflow.execution_policy.memory_per_worker_limit + return ExecutionPlan( + policy_mode=workflow.execution_policy.mode, + workload=workload_name, + selected_operations=selected_operations, + worker_kind=descriptor.preferred_worker_kind, + backend_config=backend_config, + workers=workers, + threads_per_worker=threads_per_worker, + memory_per_worker_limit=memory_per_worker_limit, + estimated_chunk_bytes=estimated_chunk_bytes, + estimated_working_set_bytes=calibration_profile.estimated_peak_memory_bytes, + estimated_chunk_count=estimated_chunk_count, + requires_gpu=descriptor.gpu_mode == EXECUTION_GPU_MODE_REQUIRED, + environment=capabilities, + calibration_profile=calibration_profile, + ) + + legacy_local_worker_cap = ( + int(workflow.dask_backend.local_cluster.n_workers) + if workflow.dask_backend.mode == DASK_BACKEND_LOCAL_CLUSTER + and workflow.dask_backend.local_cluster.n_workers is not None + else None + ) + requested_max_workers = ( + int(workflow.execution_policy.max_workers) + if workflow.execution_policy.max_workers is not None + else ( + int(legacy_local_worker_cap) + if legacy_local_worker_cap is not None + else 64 + ) + ) + reserve_bytes = min( + max(2 << 30, capabilities.memory_bytes // 10), + max(1 << 30, capabilities.memory_bytes // 6), + ) + usable_bytes = max(1 << 30, capabilities.memory_bytes - reserve_bytes) + minimum_safe_memory_bytes = max( + 1 << 30, + int(math.ceil(calibration_profile.estimated_peak_memory_bytes * 1.3)), + ) + requested_memory_bytes = _parse_memory_limit_bytes( + workflow.execution_policy.memory_per_worker_limit + ) + if requested_memory_bytes is None and ( + workflow.dask_backend.mode == DASK_BACKEND_LOCAL_CLUSTER + ): + requested_memory_bytes = _parse_memory_limit_bytes( + workflow.dask_backend.local_cluster.memory_limit + ) + + if descriptor.preferred_worker_kind == EXECUTION_WORKER_KIND_THREAD: + workers = 1 + threads_per_worker = max( + 1, + min(capabilities.cpu_count, requested_max_workers, 8), + ) + worker_memory_bytes = max( + minimum_safe_memory_bytes, + requested_memory_bytes or usable_bytes, + ) + else: + threads_per_worker = 1 + if capabilities.cpu_count >= 32 and estimated_chunk_bytes < (16 << 20): + threads_per_worker = 2 + workers_by_cpu = max(1, capabilities.cpu_count // threads_per_worker) + worker_memory_bytes = max( + minimum_safe_memory_bytes, + requested_memory_bytes or minimum_safe_memory_bytes, + ) + workers_by_memory = max(1, usable_bytes // max(1, worker_memory_bytes)) + workers_by_chunk_count = ( + max(1, int(estimated_chunk_count)) + if estimated_chunk_count is not None + else requested_max_workers + ) + if descriptor.gpu_mode == EXECUTION_GPU_MODE_NEVER: + workers_by_gpu = requested_max_workers + else: + workers_by_gpu = max(1, capabilities.gpu_count or 1) + workers = max( + 1, + min( + requested_max_workers, + workers_by_cpu, + workers_by_memory, + workers_by_chunk_count, + workers_by_gpu, + ), + ) + if threads_per_worker > 1 and workers * threads_per_worker > capabilities.cpu_count: + threads_per_worker = max(1, capabilities.cpu_count // max(1, workers)) + worker_memory_bytes = max( + worker_memory_bytes, + usable_bytes // max(1, workers), + ) + + requires_gpu = descriptor.gpu_mode == EXECUTION_GPU_MODE_REQUIRED + use_gpu_local_cluster = ( + descriptor.preferred_worker_kind == EXECUTION_WORKER_KIND_GPU_PROCESS + and capabilities.gpu_count > 0 + ) + memory_per_worker_limit = _format_worker_memory_limit(worker_memory_bytes) + if capabilities.attached_scheduler_file: + backend_config = DaskBackendConfig( + mode=DASK_BACKEND_SLURM_RUNNER, + slurm_runner=SlurmRunnerConfig( + scheduler_file=capabilities.attached_scheduler_file, + wait_for_workers=workers, + ), + ) + else: + backend_config = DaskBackendConfig( + mode=DASK_BACKEND_LOCAL_CLUSTER, + local_cluster=LocalClusterConfig( + n_workers=workers, + threads_per_worker=threads_per_worker, + memory_limit=memory_per_worker_limit, + local_directory=workflow.dask_backend.local_cluster.local_directory, + ), + slurm_runner=workflow.dask_backend.slurm_runner, + slurm_cluster=workflow.dask_backend.slurm_cluster, + ) + if use_gpu_local_cluster and capabilities.gpu_count > 0: + workers = min(workers, max(1, capabilities.gpu_count)) + backend_config = DaskBackendConfig( + mode=DASK_BACKEND_LOCAL_CLUSTER, + local_cluster=LocalClusterConfig( + n_workers=workers, + threads_per_worker=threads_per_worker, + memory_limit=memory_per_worker_limit, + local_directory=workflow.dask_backend.local_cluster.local_directory, + ), + slurm_runner=workflow.dask_backend.slurm_runner, + slurm_cluster=workflow.dask_backend.slurm_cluster, + ) + return ExecutionPlan( + policy_mode=workflow.execution_policy.mode, + workload=workload_name, + selected_operations=selected_operations, + worker_kind=descriptor.preferred_worker_kind, + backend_config=backend_config, + workers=workers, + threads_per_worker=threads_per_worker, + memory_per_worker_limit=memory_per_worker_limit, + estimated_chunk_bytes=estimated_chunk_bytes, + estimated_working_set_bytes=calibration_profile.estimated_peak_memory_bytes, + estimated_chunk_count=estimated_chunk_count, + requires_gpu=requires_gpu, + environment=capabilities, + calibration_profile=calibration_profile, + ) + + def parse_chunks(chunks: Optional[str]) -> ChunkSpec: """Parse chunk spec from CLI/GUI text. diff --git a/tests/test_main.py b/tests/test_main.py index 2a47529..5a1c507 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -17,7 +17,7 @@ import clearex.main as main_module from clearex.io.provenance import persist_run_provenance from clearex.io.read import ImageInfo -from clearex.workflow import WorkflowConfig, WorkflowExecutionCancelled +from clearex.workflow import ExecutionPolicy, WorkflowConfig, WorkflowExecutionCancelled from clearex.workflow import DaskBackendConfig, LocalClusterConfig @@ -108,6 +108,7 @@ def _fake_create_dask_client(**kwargs): workflow = WorkflowConfig( prefer_dask=True, + execution_policy=ExecutionPolicy(mode="advanced"), dask_backend=DaskBackendConfig( local_cluster=LocalClusterConfig( n_workers=4, @@ -144,6 +145,7 @@ def _fake_create_dask_client(**kwargs): workflow = WorkflowConfig( prefer_dask=True, + execution_policy=ExecutionPolicy(mode="advanced"), dask_backend=DaskBackendConfig( local_cluster=LocalClusterConfig( n_workers=1, @@ -186,6 +188,7 @@ def _fake_recommend_local_cluster_config(**kwargs): workflow = WorkflowConfig( prefer_dask=True, + execution_policy=ExecutionPolicy(mode="advanced"), usegment3d=True, analysis_parameters={ "usegment3d": { @@ -241,6 +244,7 @@ def _unexpected_recommendation(**kwargs): workflow = WorkflowConfig( prefer_dask=True, + execution_policy=ExecutionPolicy(mode="advanced"), usegment3d=True, analysis_parameters={ "usegment3d": { From 7156336c4f1f0a74fb282112cdfb9fc800c9ee79 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Fri, 20 Mar 2026 06:45:10 -0500 Subject: [PATCH 02/10] Add store-level spatial calibration for Navigate multiposition data --- AGENTS.md | 16 + README.md | 33 +- docs/AGENTS.md | 4 + docs/source/getting-started.rst | 29 +- docs/source/runtime/architecture-overview.rst | 6 +- docs/source/runtime/cli-and-execution.rst | 56 +- .../runtime/ingestion-and-canonical-store.rst | 24 +- docs/source/runtime/provenance.rst | 14 + src/clearex/codex.md | 27 + src/clearex/gui/CODEX.md | 10 +- src/clearex/gui/README.md | 11 +- src/clearex/gui/app.py | 552 +++++++++++++++++- src/clearex/io/CODEX.md | 22 + src/clearex/io/README.md | 22 + src/clearex/io/cli.py | 9 + src/clearex/io/experiment.py | 60 ++ src/clearex/io/provenance.py | 11 + src/clearex/main.py | 71 ++- src/clearex/visualization/CODEX.md | 20 +- src/clearex/visualization/README.md | 20 +- src/clearex/visualization/pipeline.py | 136 ++++- src/clearex/workflow.py | 438 ++++++++++++++ tests/gui/test_gui_execution.py | 203 +++++++ tests/io/test_cli.py | 6 + tests/io/test_experiment.py | 68 +++ tests/io/test_provenance.py | 32 +- tests/test_main.py | 249 +++++++- tests/test_workflow.py | 52 ++ tests/visualization/test_pipeline.py | 95 +++ 29 files changed, 2240 insertions(+), 56 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e65635b..bbf913b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,6 +25,7 @@ This file summarizes the current engineering strategy for agent-driven changes i - `--gui` / `--no-gui` - `--headless` (overrides GUI launch) - `--file`, `--dask`, `--chunks` + - `--stage-axis-map` - `--deconvolution`, `--particle-detection`, `--registration`, `--visualization` - GUI launch failures (missing display, missing Qt bindings, runtime issues) should gracefully fall back to headless mode with clear logs. - `experiment.yml` is a first-class input path: @@ -43,6 +44,8 @@ This file summarizes the current engineering strategy for agent-driven changes i - `WorkflowConfig` carries both storage config (`ZarrSaveConfig`) and backend config (`DaskBackendConfig`) so GUI and runtime stay aligned. - `parse_chunks` is the single source of truth for validating chunk specs. - `format_chunks` is the single source of truth for rendering chunk specs back into UI text. +- `parse_spatial_calibration` / `format_spatial_calibration` are the single + source of truth for store-level Navigate stage-to-world axis mapping. - Dask backend and Zarr-save helpers (validation, summary, serialization) should stay centralized in `workflow.py`. - Keep parsing/formatting logic centralized here to avoid drift between CLI and GUI behavior. @@ -62,6 +65,11 @@ This file summarizes the current engineering strategy for agent-driven changes i - Zarr save configuration popup: - chunk sizes in `(p, t, c, z, y, x)`, - pyramid factors per axis, + - spatial calibration control: + - per-experiment world `z/y/x` to stage `X/Y/Z/F` mapping, + - prefill from existing store metadata when available, + - keep one draft per experiment path while setup is open, + - persist the resolved mapping to every reused/new store on `Next`, - `Next` action that validates/creates canonical data store before analysis step. - second-window analysis selection flow: - deconvolution, @@ -99,8 +107,15 @@ This file summarizes the current engineering strategy for agent-driven changes i - non-Zarr/N5 sources: `data_store.zarr` beside `experiment.yml`, - Zarr/N5 sources: reuse source store path (no duplicate store path). - Canonical array layout: `(t, p, c, z, y, x)`. +- Store-level physical placement metadata must live in root attr + `spatial_calibration = {schema, stage_axis_map_zyx, theta_mode}`. +- Missing `spatial_calibration` metadata must read as identity + (`z=+z,y=+y,x=+x`); changing calibration must never rewrite canonical `data`. - For new conversion, chunking and pyramid factors come from GUI/backend `WorkflowConfig`. - After canonical data array ingest/establishment, treat base image data as read-only for downstream analysis stages. +- Visualization already consumes store-level spatial calibration for + multiposition placement; future registration must read the same metadata + instead of introducing a second mapping path. ## Parallelism Strategy @@ -136,6 +151,7 @@ This file summarizes the current engineering strategy for agent-driven changes i - input data locator and input fingerprint/hash - effective Dask backend mode + parameters - effective Zarr ingest/save chunk + pyramid settings +- effective store-level spatial calibration - software identity: - git commit SHA - git branch diff --git a/README.md b/README.md index 3f5349e..a2be2a8 100755 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ ClearEx is an open source Python package for scalable analytics of cleared and e - Input support for TIFF/OME-TIFF, Zarr/N5, HDF5 (`.h5/.hdf5/.hdf`), and NumPy (`.npy/.npz`). - Navigate experiment ingestion from `experiment.yml` / `experiment.yaml`. - Canonical analysis store layout with axis order `(t, p, c, z, y, x)`. +- Store-level spatial calibration for Navigate multiposition data, persisted per analysis store and applied to physical placement metadata without rewriting canonical image data. - Analysis operations available from the main entrypoint: - deconvolution (`results/deconvolution/latest/data`) - particle detection (`results/particle_detection/latest`) @@ -138,22 +139,33 @@ Current CLI usage: usage: clearex [-h] [--flatfield] [--deconvolution] [--particle-detection] [--usegment3d] [--channel-indices CHANNEL_INDICES] [--input-resolution-level INPUT_RESOLUTION_LEVEL] - [--shear-transform] [-r] [-v] [--mip-export] - [-f FILE] [--dask | --no-dask] [--chunks CHUNKS] - [--gui | --no-gui] [--headless] + [--shear-transform] [-r] [-v] [--mip-export] [-f FILE] + [--dask | --no-dask] [--chunks CHUNKS] + [--execution-mode {auto,advanced}] [--max-workers MAX_WORKERS] + [--memory-per-worker MEMORY_PER_WORKER] [--calibrate] + [--stage-axis-map STAGE_AXIS_MAP] [--gui | --no-gui] + [--headless] ``` ### Options +- `--flatfield`: Run flatfield-correction workflow. - `-f, --file`: Path to input image/store or Navigate `experiment.yml`. - `--deconvolution`: Run deconvolution workflow. - `--particle-detection`: Run particle detection workflow. - `--usegment3d`: Run uSegment3D segmentation workflow. - `--channel-indices`: uSegment3D channels to process (`0,1,2` or `all`). - `--input-resolution-level`: uSegment3D input pyramid level (`0`, `1`, ...). +- `--shear-transform`: Run shear-transform workflow. - `-r, --registration`: Run registration workflow hook. - `-v, --visualization`: Run visualization workflow. +- `--mip-export`: Export XY/XZ/YZ maximum-intensity projections. - `--dask / --no-dask`: Enable/disable Dask-backed reading. - `--chunks`: Chunk spec for Dask reads, for example `256` or `1,256,256`. +- `--execution-mode`: Automatic or advanced Dask execution planning mode. +- `--max-workers`: Worker cap for automatic execution planning. +- `--memory-per-worker`: Preferred per-worker memory limit for automatic execution planning. +- `--calibrate`: Refresh cached execution-planning calibration before running. +- `--stage-axis-map`: Store-level world `z/y/x` mapping for Navigate multiposition stage coordinates, for example `z=+x,y=none,x=+y`. - `--gui / --no-gui`: Enable/disable GUI launch (default is `--gui`). - `--headless`: Force non-interactive mode (overrides `--gui`). @@ -173,6 +185,15 @@ clearex --headless \ --deconvolution --usegment3d --particle-detection ``` +Run headless with an explicit stage-to-world axis mapping for Navigate multiposition placement: + +```bash +clearex --headless \ + --file /path/to/experiment.yml \ + --visualization \ + --stage-axis-map z=+x,y=none,x=+y +``` + Run headless uSegment3D on all channels: ```bash @@ -201,8 +222,12 @@ clearex --headless --no-dask --file /path/to/data_store.zarr --particle-detectio - If `--file` points to Navigate `experiment.yml`, ClearEx resolves acquisition data and materializes a canonical store first. - For non-Zarr/N5 acquisition data, materialization target is `data_store.zarr` beside `experiment.yml`. - For Zarr/N5 acquisition data, ClearEx reuses the source store path in place. +- Canonical stores persist root-attr `spatial_calibration = {schema, stage_axis_map_zyx, theta_mode}`. Missing metadata resolves to the identity mapping `z=+z,y=+y,x=+x`. +- In the setup window, `Spatial Calibration` is configured per listed experiment. Draft mappings are tracked per experiment while the dialog is open, existing stores prefill the control, and `Next` writes the resolved mapping to every reused or newly prepared store before analysis selection opens. +- In headless mode, `--stage-axis-map` writes the supplied mapping to materialized experiment stores and existing Zarr/N5 stores before analysis starts. If the flag is omitted, existing store calibration is preserved. - Deconvolution, particle detection, uSegment3D, and visualization operations run against canonical Zarr/N5 stores. - Visualization supports multi-volume overlays (for example raw `data` + `results/usegment3d/latest/data`) with per-layer image/labels display controls. +- Multiposition visualization placement now resolves world `z/y/x` translations from the store-level spatial calibration. Bindings support `X`, `Y`, `Z`, and Navigate focus axis `F` with sign inversion or `none`; `THETA` remains a rotation of the `z/y` plane about world `x`. - Visualization now probes napari OpenGL renderer info (`vendor`/`renderer`/`version`) and can fail fast when software rendering is detected or GPU rendering cannot be confirmed (`require_gpu_rendering=True`). - MIP export writes TIFF outputs as OME-TIFF (`.tif`) with projection-aware physical pixel calibration (`PhysicalSizeX/Y`) derived from source `voxel_size_um_zyx`. - uSegment3D runs per `(t, p, selected channel)` volume task and writes labels to `results/usegment3d/latest/data`. @@ -291,6 +316,8 @@ clearex --headless --no-dask --file /path/to/data_store.zarr --particle-detectio - Visualization parameters include `require_gpu_rendering` (enabled by default). Disable only when running intentionally without a GPU-backed OpenGL context. ## Output Layout (Canonical Store) +- Root metadata: + - `spatial_calibration` for store-level world `z/y/x` placement mapping - Base image data: `data` - Multiscale pyramid levels: `data_pyramid/level_*` - Latest analysis outputs: diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 445fead..731e846 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -26,6 +26,10 @@ Build docs: `make html -j 15` ## Documentation practices Be concise, specific, and value dense Write so that a new developer to this codebase can understand your writing, don’t assume your audience are experts in the topic/area you are writing about. +- When runtime behavior changes, update the matching ``docs/source/runtime`` + pages and the affected top-level/module ``README.md`` / ``CODEX.md`` / + ``AGENTS.md`` notes in the same change so CLI flags, store metadata names, + and provenance fields stay aligned. ## Boundaries - ✅ **Always do:** Write new files to `docs/`, follow the style examples diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index c387d04..d525f82 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -4,8 +4,9 @@ Getting Started Installation ------------ -ClearEx supports Python ``3.12`` and ``3.13`` (project constraint: ``>=3.12,<3.14``). -Using ``uv`` with an explicit Python version avoids accidental ``3.14`` installs. +ClearEx currently targets Python ``3.12`` (project constraint: +``>=3.12,<3.13``). Using ``uv`` with an explicit Python version avoids +accidental unsupported environments. macOS ^^^^^ @@ -13,8 +14,8 @@ macOS .. code-block:: bash curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.13 - uv venv --python 3.13 + uv python install 3.12 + uv venv --python 3.12 source .venv/bin/activate uv pip install -e ".[docs]" @@ -24,8 +25,8 @@ Linux .. code-block:: bash curl -LsSf https://astral.sh/uv/install.sh | sh - uv python install 3.13 - uv venv --python 3.13 + uv python install 3.12 + uv venv --python 3.12 source .venv/bin/activate uv pip install -e ".[docs]" @@ -35,8 +36,8 @@ Windows (PowerShell) .. code-block:: powershell powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" - uv python install 3.13 - uv venv --python 3.13 + uv python install 3.12 + uv venv --python 3.12 .venv\Scripts\Activate.ps1 uv pip install -e ".[docs]" @@ -61,6 +62,18 @@ Run in headless mode against an experiment file: uv run python -m clearex.main --headless --no-gui --file /path/to/experiment.yml --dask +Run in headless mode with an explicit Navigate stage-to-world mapping: + +.. code-block:: bash + + uv run clearex --headless \ + --file /path/to/experiment.yml \ + --visualization \ + --stage-axis-map z=+x,y=none,x=+y + +In the GUI setup flow, the same mapping can be authored through the +``Spatial Calibration`` panel before entering analysis selection. + Documentation Build ------------------- diff --git a/docs/source/runtime/architecture-overview.rst b/docs/source/runtime/architecture-overview.rst index a2c3741..a73dcc5 100644 --- a/docs/source/runtime/architecture-overview.rst +++ b/docs/source/runtime/architecture-overview.rst @@ -8,6 +8,7 @@ The runtime architecture is built around these constraints: - GUI-first operator workflow, with fully supported headless execution. - Canonical analysis data layout in ``(t, p, c, z, y, x)``. +- Metadata-only spatial calibration for Navigate multiposition placement. - Deterministic latest-output paths for large derived arrays. - Append-only, FAIR-oriented provenance records. - Shared configuration model between GUI and headless paths. @@ -21,7 +22,8 @@ ClearEx is intentionally split into layers that can evolve independently: GUI in ``clearex.gui.app`` and CLI parsing in ``clearex.io.cli``. 2. Configuration layer: typed runtime schema in ``clearex.workflow.WorkflowConfig`` (plus - ``DaskBackendConfig`` and ``ZarrSaveConfig``). + ``DaskBackendConfig``, ``ZarrSaveConfig``, and + ``SpatialCalibrationConfig``). 3. Orchestration layer: workflow entrypoint and execution coordinator in ``clearex.main``. 4. Data and metadata layer: @@ -54,6 +56,8 @@ These contracts are stable and expected by multiple modules: - Canonical base image shape is always six-dimensional in ``(t, p, c, z, y, x)`` order. - Multiscale levels are stored under ``data_pyramid/level_``. +- Root attr ``spatial_calibration`` is the canonical per-store placement + mapping for Navigate multiposition stage coordinates. - Large analysis outputs are latest-only under ``results//latest``. - Provenance run history is append-only under ``provenance/runs``. - Provenance includes hash chaining for tamper-evident verification. diff --git a/docs/source/runtime/cli-and-execution.rst b/docs/source/runtime/cli-and-execution.rst index 624ef31..41dd9fd 100644 --- a/docs/source/runtime/cli-and-execution.rst +++ b/docs/source/runtime/cli-and-execution.rst @@ -11,11 +11,21 @@ Current primary arguments are: - ``--flatfield`` - ``--deconvolution`` - ``--particle-detection`` +- ``--usegment3d`` +- ``--channel-indices`` +- ``--input-resolution-level`` +- ``--shear-transform`` - ``--registration`` - ``--visualization`` +- ``--mip-export`` - ``--file`` - ``--dask`` / ``--no-dask`` - ``--chunks`` +- ``--execution-mode`` +- ``--max-workers`` +- ``--memory-per-worker`` +- ``--calibrate`` +- ``--stage-axis-map`` - ``--gui`` / ``--no-gui`` - ``--headless`` @@ -45,9 +55,15 @@ The first GUI window is an experiment-list driven setup flow: - Double-clicking a list item reloads that experiment's metadata explicitly. - The current ordered list can be saved back to a reusable ``.clearex-experiment-list.json`` file. +- ``Spatial Calibration`` edits store-level world ``z/y/x`` placement mapping + for the currently selected experiment without rewriting canonical image data. +- Spatial-calibration drafts are tracked per experiment while setup is open. +- Existing stores prefill the spatial-calibration control when metadata is + already present. - Pressing ``Next`` batch-prepares canonical stores for every listed - experiment that is missing a complete store, then opens analysis selection - for the currently selected experiment. + experiment that is missing a complete store, persists the resolved spatial + calibration to every reused or newly prepared store, then opens analysis + selection for the currently selected experiment. - The setup dialog persists the last-used Zarr save configuration across sessions. - ``Rebuild Canonical Store`` forces the listed stores to be rebuilt with the @@ -97,6 +113,38 @@ Examples: --file /path/to/data_store.zarr \ --visualization +.. code-block:: bash + + # Headless Navigate run with explicit stage-to-world placement mapping + clearex --headless \ + --file /path/to/experiment.yml \ + --visualization \ + --stage-axis-map z=+x,y=none,x=+y + +Spatial Calibration +------------------- + +Spatial calibration is a store-level mapping from world ``z/y/x`` placement +axes to Navigate multiposition stage coordinates. + +- Canonical text form is ``z=...,y=...,x=...``. +- Allowed bindings are ``+x``, ``-x``, ``+y``, ``-y``, ``+z``, ``-z``, + ``+f``, ``-f``, and ``none``. +- Default identity mapping is ``z=+z,y=+y,x=+x``. +- ``none`` disables translation on that world axis. +- ``THETA`` remains interpreted as rotation of the ``z/y`` plane about world + ``x``. + +GUI and headless flows share the same normalized parser and storage policy: + +- GUI setup writes the resolved mappings to the listed experiment stores on + ``Next``. +- ``--stage-axis-map`` writes an explicit override to Navigate-materialized + stores and existing Zarr/N5 stores before analysis starts. +- If no explicit override is supplied, existing store calibration is preserved. +- The mapping changes placement metadata only; canonical ``data`` remains + unchanged. + Interchangeable Routine Composition ----------------------------------- @@ -120,8 +168,8 @@ Runtime source aliases currently include: - ``deconvolution`` -> ``results/deconvolution/latest/data`` - ``registration`` -> ``results/registration/latest/data`` -When a requested source component does not exist, operation-specific fallback -logic can revert to ``data`` to keep workflows operable. +When a requested source component does not exist, runtime raises an input +dependency error instead of silently falling back to ``data``. Progress and Run Lifecycle -------------------------- diff --git a/docs/source/runtime/ingestion-and-canonical-store.rst b/docs/source/runtime/ingestion-and-canonical-store.rst index 6e67298..e4ebbd0 100644 --- a/docs/source/runtime/ingestion-and-canonical-store.rst +++ b/docs/source/runtime/ingestion-and-canonical-store.rst @@ -11,7 +11,8 @@ entrypoint. ``clearex.io.experiment`` parses: - timepoints/z steps/channels/positions, - camera dimensions, - pixel size metadata, -- multiposition metadata (including ``multi_positions.yml`` when available). +- multiposition metadata (including ``multi_positions.yml`` and its + ``X/Y/Z/F/THETA`` stage rows when available). Source Data Resolution ---------------------- @@ -66,6 +67,25 @@ Pyramid levels: Store metadata captures source path/component/axes and effective write strategy for reproducibility. +Store-Level Spatial Calibration +------------------------------- + +Canonical analysis stores also persist optional placement metadata for Navigate +multiposition datasets in the root attr ``spatial_calibration``. + +- Schema payload is ``{schema, stage_axis_map_zyx, theta_mode}``. +- Missing metadata resolves to the identity mapping + ``z=+z,y=+y,x=+x``. +- Calibration is metadata-only and does not rewrite canonical ``data``. +- GUI setup writes the resolved mapping on ``Next`` for every prepared or + reused store in the experiment list. +- Headless ``--stage-axis-map`` writes an explicit override after + materialization for ``experiment.yml`` inputs and before analysis for + existing Zarr/N5 stores. +- Legacy stores without this attr are backfilled logically as identity, while + stores that already have a mapping keep it unless the operator explicitly + overrides it. + Materialization Lifecycle ------------------------- @@ -76,7 +96,7 @@ Materialization Lifecycle 3. chunk normalization, 4. canonical base data writes, 5. pyramid level materialization, -6. ingestion completion metadata update. +6. ingestion completion metadata update and store-metadata preservation. If a store is already complete for expected chunks/pyramid settings, materialization returns quickly without rewriting data. diff --git a/docs/source/runtime/provenance.rst b/docs/source/runtime/provenance.rst index 9b3fd99..bab8146 100644 --- a/docs/source/runtime/provenance.rst +++ b/docs/source/runtime/provenance.rst @@ -31,6 +31,7 @@ Run Record Content - input summary and input fingerprint hash, - normalized workflow settings, - effective Dask backend payload and chunk/pyramid settings, +- effective spatial-calibration payload/text/explicitness, - selected analyses and per-analysis parameters, - ordered step records and output references, - software metadata (package version, git commit/branch/dirty), @@ -61,6 +62,19 @@ Latest Output References This decouples large arrays from the append-only provenance history while keeping latest output pointers searchable. +Spatial Placement Reproducibility +--------------------------------- + +Store-level Navigate placement metadata is part of the reproducibility record: + +- workflow provenance stores the effective ``spatial_calibration`` payload, + canonical text form, and whether it was explicitly supplied by the operator; +- visualization latest metadata stores the effective spatial calibration used + for multiposition placement. + +This keeps historical runs interpretable even when microscope stage axes do not +match camera/world axes. + History Summaries and Dedup-Aware Execution ------------------------------------------- diff --git a/src/clearex/codex.md b/src/clearex/codex.md index 9537043..fd9cbc7 100644 --- a/src/clearex/codex.md +++ b/src/clearex/codex.md @@ -23,6 +23,9 @@ This directory contains the runtime orchestration surface for ClearEx. - Canonical source array component is `data`. - Analysis outputs use `results//latest/...` (latest-only replacement). - Provenance records are append-only and include workflow + runtime parameters. +- Root store attr `spatial_calibration` is the canonical store-level + stage-to-world axis mapping for Navigate multiposition placement; missing + attrs mean identity mapping. ## Dask Workload Policy @@ -84,6 +87,30 @@ This directory contains the runtime orchestration surface for ClearEx. - auto-built pyramids are cached under `results/visualization_cache/pyramids/...`. +## Recent Runtime Updates (2026-03-20) + +- Added store-level spatial calibration for Navigate multiposition datasets: + - `WorkflowConfig` now carries `SpatialCalibrationConfig`, + - canonical text form is `z=...,y=...,x=...`, + - allowed bindings are `+/-x`, `+/-y`, `+/-z`, `+/-f`, and `none`, + - the root store attr `spatial_calibration` persists schema, mapping, and + `theta_mode`, + - missing attrs resolve to identity instead of requiring backfilled config. +- Setup flow now exposes a lightweight `Spatial Calibration` control per + experiment: + - one draft is kept per experiment while setup is open, + - existing stores prefill the current mapping, + - `Next` writes the resolved mapping to every reused or newly prepared store. +- Headless workflows now accept `--stage-axis-map` for Navigate + `experiment.yml` inputs and existing Zarr/N5 stores. +- Visualization position affines now derive world `z/y/x` translations from + the stored calibration: + - Navigate `F` is available as a placement source, + - `none` zeroes a world axis translation, + - sign inversion is supported, + - `THETA` remains rotation of the `z/y` plane about world `x`. +- Provenance now records the effective spatial calibration used by the run. + ## Sequencing and Inputs - Operation order is driven by `analysis_parameters[]["execution_order"]`. diff --git a/src/clearex/gui/CODEX.md b/src/clearex/gui/CODEX.md index a71f31c..4415bb1 100644 --- a/src/clearex/gui/CODEX.md +++ b/src/clearex/gui/CODEX.md @@ -7,8 +7,11 @@ This folder owns the PyQt6 UX in `app.py`. - Setup window (`ClearExSetupDialog`): - Select file / `experiment.yml` - Configure Dask backend and Zarr save options + - Configure per-experiment `Spatial Calibration` (world `z/y/x` to Navigate + stage `X/Y/Z/F` or `none`) - Display image metadata - - Materialize canonical store when missing, with progress dialog + - Materialize canonical store when missing, persist resolved spatial + calibration to all requested stores, and show progress dialog - Analysis window (`AnalysisSelectionDialog`): - `Analysis Scope` manages the active experiment/store context for single or batch analysis @@ -24,6 +27,9 @@ This folder owns the PyQt6 UX in `app.py`. - fall back to the latest completed provenance-backed workflow state, - persist current widget state on experiment switch, close, and run, - keep `Restore Latest Run Parameters` working for the active dataset. +- The active analysis workflow must always carry the selected target store's + spatial calibration so visualization and future registration use the same + placement metadata. - When adding a new analysis workflow, new operation widget, or new parameter: - add the default and normalization path in `src/clearex/workflow.py`, - hydrate the widget from restored `analysis_parameters`, @@ -42,6 +48,8 @@ This folder owns the PyQt6 UX in `app.py`. - Unselected operations should not be configurable. - Per-operation `Input source` options depend on selected upstream operations and execution order. - `Visualization` is treated as a terminal/view step; it should not be offered as an upstream image source for later operations. +- Visualization placement must come from persisted store metadata + (`spatial_calibration`), not transient GUI-only state. - Visualization configuration currently exposes: - `position_index` for multiposition datasets - multiscale loading toggle diff --git a/src/clearex/gui/README.md b/src/clearex/gui/README.md index 73c3079..3b66e40 100644 --- a/src/clearex/gui/README.md +++ b/src/clearex/gui/README.md @@ -12,10 +12,15 @@ This folder owns the PyQt6 UX in `app.py`. - Add/remove experiment entries from the list and persist the list for reuse - Auto-load metadata when the current list selection changes - Configure Dask backend and Zarr save options + - Configure `Spatial Calibration` for the currently selected experiment: + - map world `z/y/x` to Navigate stage `X/Y/Z/F` or `none`, + - prefill from the target store when available, + - otherwise keep a per-experiment draft while setup remains open - Persist the last-used Zarr save config across sessions - Display image metadata - On `Next`, batch-materialize only missing/incomplete canonical stores for - every listed experiment, then continue with the currently selected + every listed experiment, persist the resolved spatial calibration for + every reused/new store, then continue with the currently selected experiment - `Rebuild Canonical Store` forces the listed stores to be rebuilt with the current GUI chunking and pyramid settings @@ -29,6 +34,8 @@ This folder owns the PyQt6 UX in `app.py`. active dataset - The selected experiment switches the active store/provenance context shown in the dialog + - The selected experiment also switches the effective store-backed spatial + calibration carried in the workflow used for analysis launch - Current analysis-widget values are persisted per dataset on target switch, close, and run so reopening a store restores the previous GUI state - Left: operation selection, execution order, and `Configure` buttons @@ -44,6 +51,8 @@ This folder owns the PyQt6 UX in `app.py`. - Unselected operations should not be configurable. - Per-operation `Input source` options depend on selected upstream operations and execution order. - `Visualization` is treated as a terminal/view step; it should not be offered as an upstream image source for later operations. +- Visualization placement should come from the active target store's persisted + `spatial_calibration`, not from one-off GUI-only state. - Visualization configuration currently exposes: - `position_index` for multiposition datasets - multiscale loading toggle diff --git a/src/clearex/gui/app.py b/src/clearex/gui/app.py index 68c9abf..019a62a 100644 --- a/src/clearex/gui/app.py +++ b/src/clearex/gui/app.py @@ -61,9 +61,11 @@ infer_zyx_shape, is_navigate_experiment_file, load_navigate_experiment, + load_store_spatial_calibration, materialize_experiment_data_store, resolve_data_store_path, resolve_experiment_data_path, + save_store_spatial_calibration, ) from clearex.io.provenance import ( is_zarr_store_path, @@ -90,6 +92,8 @@ LocalClusterRecommendation, LocalClusterConfig, PTCZYX_AXES, + SPATIAL_CALIBRATION_WORLD_AXES, + SpatialCalibrationConfig, SlurmClusterConfig, SlurmRunnerConfig, WorkflowConfig, @@ -107,15 +111,18 @@ execution_policy_to_dict, format_execution_plan_summary, format_execution_policy_summary, + format_spatial_calibration, format_local_cluster_recommendation_summary, format_pyramid_levels, format_zarr_chunks_ptczyx, format_zarr_pyramid_ptczyx, + normalize_spatial_calibration, normalize_analysis_operation_parameters, parse_pyramid_levels, plan_execution, recommend_local_cluster_config, resolve_analysis_input_component, + spatial_calibration_to_dict, validate_analysis_input_references, zarr_save_from_dict, zarr_save_to_dict, @@ -206,6 +213,8 @@ class GuiUnavailableError(RuntimeError): _CLEAREX_EXPERIMENT_LIST_FILE_SUFFIX = ".clearex-experiment-list.json" _SETUP_DIALOG_MINIMUM_SIZE = (1240, 920) _SETUP_DIALOG_PREFERRED_SIZE = (1520, 1120) +_SPATIAL_CALIBRATION_DIALOG_MINIMUM_SIZE = (620, 420) +_SPATIAL_CALIBRATION_DIALOG_PREFERRED_SIZE = (720, 520) _ZARR_SAVE_DIALOG_MINIMUM_SIZE = (860, 660) _ZARR_SAVE_DIALOG_PREFERRED_SIZE = (940, 760) _DASK_BACKEND_DIALOG_MINIMUM_SIZE = (940, 840) @@ -2138,6 +2147,64 @@ def _dask_mode_help_text(mode: str) -> str: ) +def _spatial_calibration_binding_choices() -> tuple[tuple[str, str], ...]: + """Return labeled spatial-calibration binding choices for GUI controls. + + Parameters + ---------- + None + + Returns + ------- + tuple[tuple[str, str], ...] + ``(label, binding)`` pairs in UI display order. + """ + return ( + ("+X stage", "+x"), + ("-X stage", "-x"), + ("+Y stage", "+y"), + ("-Y stage", "-y"), + ("+Z stage", "+z"), + ("-Z stage", "-z"), + ("+F focus", "+f"), + ("-F focus", "-f"), + ("Disabled", "none"), + ) + + +def _format_spatial_calibration_summary(config: SpatialCalibrationConfig) -> str: + """Format a compact setup-dialog summary for spatial calibration. + + Parameters + ---------- + config : SpatialCalibrationConfig + Calibration to summarize. + + Returns + ------- + str + Human-readable summary for setup and analysis dialogs. + """ + binding_labels = { + binding: label + for label, binding in _spatial_calibration_binding_choices() + } + lines = [ + f"Canonical: {format_spatial_calibration(config)}", + ] + for axis_name, binding in zip( + SPATIAL_CALIBRATION_WORLD_AXES, + config.stage_axis_map_zyx, + strict=False, + ): + lines.append( + f"World {axis_name.upper()}: " + f"{binding_labels.get(binding, binding)}" + ) + lines.append("Theta: Rotate Z/Y about world X") + return "\n".join(lines) + + def _popup_dialog_stylesheet() -> str: """Return shared stylesheet for configuration popup dialogs. @@ -3111,6 +3178,165 @@ def _on_apply(self) -> None: self.accept() + class SpatialCalibrationDialog(QDialog): + """Dialog for configuring store-level stage-to-world axis bindings.""" + + def __init__( + self, + initial: SpatialCalibrationConfig, + parent: Optional[QDialog] = None, + ) -> None: + """Initialize spatial-calibration controls. + + Parameters + ---------- + initial : SpatialCalibrationConfig + Initial stage-to-world mapping. + parent : QDialog, optional + Parent dialog widget. + + Returns + ------- + None + Dialog is initialized in-place. + """ + super().__init__(parent) + self.setWindowTitle("Spatial Calibration") + self.result_config: Optional[SpatialCalibrationConfig] = None + self._binding_inputs: Dict[str, QComboBox] = {} + + self._build_ui() + self._hydrate(initial) + self.setStyleSheet(_popup_dialog_stylesheet()) + _apply_initial_dialog_geometry( + self, + minimum_size=_SPATIAL_CALIBRATION_DIALOG_MINIMUM_SIZE, + preferred_size=_SPATIAL_CALIBRATION_DIALOG_PREFERRED_SIZE, + content_size_hint=(self.sizeHint().width(), self.sizeHint().height()), + ) + + def _build_ui(self) -> None: + """Construct dialog controls and wire signals. + + Parameters + ---------- + None + + Returns + ------- + None + Widgets are created and connected in-place. + """ + outer_root = QVBoxLayout(self) + outer_root.setContentsMargins(0, 0, 0, 0) + outer_root.setSpacing(0) + + content_scroll = QScrollArea(self) + content_scroll.setObjectName("popupDialogScroll") + content_scroll.setWidgetResizable(True) + content_scroll.setFrameShape(QFrame.Shape.NoFrame) + content_scroll.setHorizontalScrollBarPolicy( + Qt.ScrollBarPolicy.ScrollBarAlwaysOff + ) + outer_root.addWidget(content_scroll, 1) + + content_widget = QWidget() + content_widget.setObjectName("popupDialogContent") + content_scroll.setWidget(content_widget) + + root = QVBoxLayout(content_widget) + apply_popup_root_spacing(root) + + description = QLabel( + "Map world Z/Y/X placement axes to Navigate multiposition stage " + "coordinates. This affects spatial placement metadata only; " + "canonical data remains unchanged." + ) + description.setWordWrap(True) + root.addWidget(description) + + bindings_group = QGroupBox("World Axis Mapping") + bindings_layout = QFormLayout(bindings_group) + apply_form_spacing(bindings_layout) + + for axis_name in SPATIAL_CALIBRATION_WORLD_AXES: + combo = QComboBox() + for label, binding in _spatial_calibration_binding_choices(): + combo.addItem(label, binding) + bindings_layout.addRow(f"World {axis_name.upper()}", combo) + self._binding_inputs[axis_name] = combo + + root.addWidget(bindings_group) + + note = QLabel( + "THETA remains a rotation of the Z/Y plane about world X for v1." + ) + note.setWordWrap(True) + root.addWidget(note) + + footer = QHBoxLayout() + apply_footer_row_spacing(footer) + self._defaults_button = _configure_fixed_height_button( + QPushButton("Reset Identity") + ) + self._cancel_button = _configure_fixed_height_button( + QPushButton("Cancel") + ) + self._apply_button = _configure_fixed_height_button( + QPushButton("Apply") + ) + self._apply_button.setObjectName("runButton") + footer.addWidget(self._defaults_button) + footer.addStretch(1) + footer.addWidget(self._cancel_button) + footer.addWidget(self._apply_button) + root.addLayout(footer) + + self._defaults_button.clicked.connect(self._on_reset_defaults) + self._cancel_button.clicked.connect(self.reject) + self._apply_button.clicked.connect(self._on_apply) + content_widget.setMinimumHeight(root.sizeHint().height()) + + def _hydrate(self, initial: SpatialCalibrationConfig) -> None: + """Populate combo boxes from an initial calibration. + + Parameters + ---------- + initial : SpatialCalibrationConfig + Calibration to display. + + Returns + ------- + None + Widget values are updated in-place. + """ + for axis_name, binding in zip( + SPATIAL_CALIBRATION_WORLD_AXES, + initial.stage_axis_map_zyx, + strict=False, + ): + combo = self._binding_inputs[axis_name] + index = combo.findData(binding) + combo.setCurrentIndex(index if index >= 0 else 0) + + def _on_reset_defaults(self) -> None: + """Reset controls to the identity mapping.""" + self._hydrate(SpatialCalibrationConfig()) + + def _on_apply(self) -> None: + """Validate the selected bindings and accept the dialog.""" + try: + self.result_config = SpatialCalibrationConfig( + stage_axis_map_zyx=tuple( + str(self._binding_inputs[axis_name].currentData()) + for axis_name in SPATIAL_CALIBRATION_WORLD_AXES + ) + ) + except ValueError as exc: + QMessageBox.warning(self, "Invalid Spatial Calibration", str(exc)) + return + self.accept() + class DaskBackendConfigDialog(QDialog): """Dialog for configuring Dask backend execution mode and parameters. @@ -4868,9 +5094,14 @@ def __init__(self, initial: WorkflowConfig) -> None: self._loaded_experiment_path: Optional[Path] = None self._loaded_image_info: Optional[ImageInfo] = None self._loaded_source_data_path: Optional[Path] = None + self._loaded_target_store_path: Optional[Path] = None self._experiment_list_file_path: Optional[Path] = None self._experiment_list_dirty = False self._source_data_directory_overrides: Dict[Path, Path] = {} + self._spatial_calibration_drafts: Dict[Path, SpatialCalibrationConfig] = {} + self._current_spatial_calibration: SpatialCalibrationConfig = ( + initial.spatial_calibration + ) self._materialization_worker: Optional[QThread] = None self._rebuild_store_checkbox: Optional[QCheckBox] = None @@ -5060,6 +5291,27 @@ def _build_ui(self) -> None: zarr_layout.addLayout(zarr_button_row) root.addWidget(zarr_group) + spatial_group = QGroupBox("Spatial Calibration") + spatial_layout = QVBoxLayout(spatial_group) + apply_stack_spacing(spatial_layout) + spatial_layout.setContentsMargins(10, 8, 10, 10) + self._spatial_calibration_summary = QLabel("n/a") + self._spatial_calibration_summary.setObjectName("metadataFieldValue") + self._spatial_calibration_summary.setWordWrap(True) + self._spatial_calibration_summary.setTextInteractionFlags( + Qt.TextInteractionFlag.TextSelectableByMouse + ) + spatial_layout.addWidget(self._spatial_calibration_summary) + spatial_button_row = QHBoxLayout() + apply_row_spacing(spatial_button_row) + spatial_button_row.addStretch(1) + self._spatial_calibration_button = QPushButton( + "Edit Spatial Calibration" + ) + spatial_button_row.addWidget(self._spatial_calibration_button) + spatial_layout.addLayout(spatial_button_row) + root.addWidget(spatial_group) + dask_backend_group = QGroupBox("Execution Planning") dask_backend_layout = QVBoxLayout(dask_backend_group) apply_stack_spacing(dask_backend_layout) @@ -5125,6 +5377,9 @@ def _build_ui(self) -> None: ) self._dask_backend_button.clicked.connect(self._on_edit_dask_backend) self._zarr_config_button.clicked.connect(self._on_edit_zarr_settings) + self._spatial_calibration_button.clicked.connect( + self._on_edit_spatial_calibration + ) self._cancel_button.clicked.connect(self.reject) self._next_button.clicked.connect(self._on_next) self._rebuild_store_checkbox.toggled.connect(self._on_rebuild_store_toggled) @@ -5145,6 +5400,7 @@ def _hydrate(self, initial: WorkflowConfig) -> None: """ self._refresh_dask_backend_summary() self._refresh_zarr_save_summary() + self._refresh_spatial_calibration_summary() initial_file = str(initial.file or "").strip() if not initial_file: self._refresh_experiment_actions() @@ -5225,6 +5481,107 @@ def _refresh_zarr_save_summary(self) -> None: self._zarr_config_summary.setText(summary) self._zarr_config_summary.setToolTip(summary) + def _refresh_spatial_calibration_summary(self) -> None: + """Refresh setup summary text for spatial calibration. + + Parameters + ---------- + None + + Returns + ------- + None + Summary label and button state are updated in-place. + """ + summary = _format_spatial_calibration_summary( + self._current_spatial_calibration + ) + self._spatial_calibration_summary.setText(summary) + self._spatial_calibration_summary.setToolTip(summary) + has_selection = self._current_selected_experiment_path() is not None + self._spatial_calibration_button.setEnabled(bool(has_selection)) + + def _resolve_spatial_calibration_for_experiment( + self, + experiment_path: Path, + *, + target_store: Optional[Path] = None, + ) -> SpatialCalibrationConfig: + """Resolve draft/store/default calibration for one experiment. + + Parameters + ---------- + experiment_path : pathlib.Path + Selected experiment path. + target_store : pathlib.Path, optional + Prepared or target analysis-store path when already known. + + Returns + ------- + SpatialCalibrationConfig + Effective calibration for the experiment within the setup + session. + """ + resolved_experiment_path = Path(experiment_path).expanduser().resolve() + draft = self._spatial_calibration_drafts.get(resolved_experiment_path) + if draft is not None: + return draft + + store_path = ( + Path(target_store).expanduser().resolve() + if target_store is not None + else None + ) + if store_path is None: + try: + request = self._resolve_store_preparation_request( + resolved_experiment_path + ) + except Exception: + return SpatialCalibrationConfig() + store_path = request.target_store + + if is_zarr_store_path(store_path) and Path(store_path).exists(): + return load_store_spatial_calibration(store_path) + return SpatialCalibrationConfig() + + def _set_current_spatial_calibration( + self, + *, + experiment_path: Optional[Path], + calibration: Optional[SpatialCalibrationConfig] = None, + target_store: Optional[Path] = None, + ) -> None: + """Update the currently displayed calibration for setup. + + Parameters + ---------- + experiment_path : pathlib.Path, optional + Active experiment path. + calibration : SpatialCalibrationConfig, optional + Explicit calibration to display. + target_store : pathlib.Path, optional + Target store path used when loading from store attrs. + + Returns + ------- + None + Current setup calibration state is updated in-place. + """ + if experiment_path is None: + self._current_spatial_calibration = SpatialCalibrationConfig() + else: + resolved_experiment_path = Path(experiment_path).expanduser().resolve() + self._current_spatial_calibration = ( + calibration + if calibration is not None + else self._resolve_spatial_calibration_for_experiment( + resolved_experiment_path, + target_store=target_store, + ) + ) + self._refresh_spatial_calibration_summary() + def _on_edit_dask_backend(self) -> None: """Open backend dialog and apply selected configuration. @@ -5285,6 +5642,47 @@ def _on_edit_zarr_settings(self) -> None: _save_last_used_zarr_save_config(self._zarr_save_config) self._set_status("Updated Zarr save settings.") + def _on_edit_spatial_calibration(self) -> None: + """Open spatial-calibration dialog for the selected experiment. + + Parameters + ---------- + None + + Returns + ------- + None + Stores the selected calibration as an in-session draft. + """ + experiment_path = self._current_selected_experiment_path() + if experiment_path is None: + QMessageBox.information( + self, + "No Experiment Selected", + "Select an experiment before editing spatial calibration.", + ) + return + + dialog = SpatialCalibrationDialog( + initial=self._current_spatial_calibration, + parent=self, + ) + result = dialog.exec() + if result != QDialog.DialogCode.Accepted or dialog.result_config is None: + return + + resolved_experiment_path = Path(experiment_path).expanduser().resolve() + self._spatial_calibration_drafts[resolved_experiment_path] = ( + dialog.result_config + ) + self._set_current_spatial_calibration( + experiment_path=resolved_experiment_path, + calibration=dialog.result_config, + ) + self._set_status( + "Updated spatial calibration draft for the selected experiment." + ) + def _on_rebuild_store_toggled(self, checked: bool) -> None: """Update setup status when rebuild mode is toggled. @@ -5589,6 +5987,7 @@ def _reset_metadata_labels(self) -> None: """ for label in self._metadata_labels.values(): label.setText("n/a") + self._set_current_spatial_calibration(experiment_path=None) def _experiment_path_from_item( self, @@ -5859,6 +6258,7 @@ def _clear_loaded_experiment_context(self) -> None: self._loaded_experiment_path = None self._loaded_image_info = None self._loaded_source_data_path = None + self._loaded_target_store_path = None def eventFilter(self, watched: QObject, event: QEvent) -> bool: """Handle drag/drop events routed through the experiment list. @@ -6266,6 +6666,8 @@ def _on_current_experiment_changed( self._clear_loaded_experiment_context() self._reset_metadata_labels() self._set_status("Ready") + else: + self._set_current_spatial_calibration(experiment_path=None) return self._load_selected_experiment_metadata() @@ -6410,6 +6812,9 @@ def _load_metadata_for_experiment_path( self._clear_loaded_experiment_context() self._reset_metadata_labels() self._metadata_labels["path"].setText(str(resolved_experiment_path)) + self._set_current_spatial_calibration( + experiment_path=resolved_experiment_path + ) _show_themed_error_dialog( self, "Metadata Load Failed", @@ -6437,6 +6842,11 @@ def _load_metadata_for_experiment_path( self._loaded_source_data_path = source_data_path target_store = resolve_data_store_path(experiment, source_data_path) + self._loaded_target_store_path = Path(target_store).expanduser().resolve() + self._set_current_spatial_calibration( + experiment_path=loaded_path, + target_store=self._loaded_target_store_path, + ) self._set_status(f"Metadata loaded. Target store: {target_store}") def _prompt_for_source_data_directory( @@ -6623,12 +7033,42 @@ def _current_dtype_itemsize(self) -> Optional[int]: except Exception: return None + def _persist_spatial_calibration_for_requests( + self, + requests: Sequence[ExperimentStorePreparationRequest], + ) -> Dict[Path, SpatialCalibrationConfig]: + """Persist resolved spatial calibration for each prepared store. + + Parameters + ---------- + requests : sequence[ExperimentStorePreparationRequest] + Prepared or reused store requests. + + Returns + ------- + dict[pathlib.Path, SpatialCalibrationConfig] + Effective calibration written for each target store. + """ + persisted: Dict[Path, SpatialCalibrationConfig] = {} + for request in requests: + resolved_store = Path(request.target_store).expanduser().resolve() + calibration = self._resolve_spatial_calibration_for_experiment( + Path(request.experiment_path).expanduser().resolve(), + target_store=resolved_store, + ) + persisted[resolved_store] = save_store_spatial_calibration( + resolved_store, + calibration, + ) + return persisted + def _accept_with_store_path( self, store_path: Path, *, analysis_targets: Sequence[AnalysisTarget], selected_experiment_path: Path, + spatial_calibration: SpatialCalibrationConfig, ) -> None: """Finalize setup dialog with prepared store path configuration. @@ -6642,6 +7082,8 @@ def _accept_with_store_path( selected_experiment_path : pathlib.Path Navigate experiment descriptor currently selected in the setup list. + spatial_calibration : SpatialCalibrationConfig + Effective calibration written to the selected target store. Returns ------- @@ -6667,6 +7109,8 @@ def _accept_with_store_path( visualization=False, mip_export=False, zarr_save=self._zarr_save_config, + spatial_calibration=spatial_calibration, + spatial_calibration_explicit=False, ) _save_last_used_execution_policy( replace( @@ -6749,6 +7193,23 @@ def _on_next(self) -> None: analysis_targets = _analysis_targets_from_store_requests(requests) if not pending_requests: + try: + persisted_calibrations = ( + self._persist_spatial_calibration_for_requests(requests) + ) + except Exception as exc: + logging.getLogger(__name__).exception( + "Failed to persist spatial calibration for prepared stores." + ) + _show_themed_error_dialog( + self, + "Spatial Calibration Failed", + "Failed to persist spatial calibration to the prepared analysis stores.", + summary=f"{type(exc).__name__}: {exc}", + details=traceback.format_exc(), + ) + self._set_status("Failed to persist spatial calibration.") + return self._set_status( "All listed data stores are ready. Opening analysis selection." ) @@ -6756,6 +7217,9 @@ def _on_next(self) -> None: selected_request.target_store, analysis_targets=analysis_targets, selected_experiment_path=selected_request.experiment_path, + spatial_calibration=persisted_calibrations[ + Path(selected_request.target_store).expanduser().resolve() + ], ) return @@ -6846,10 +7310,30 @@ def _on_next(self) -> None: prepared_count, ready_count, ) + try: + persisted_calibrations = self._persist_spatial_calibration_for_requests( + requests + ) + except Exception as exc: + logging.getLogger(__name__).exception( + "Failed to persist spatial calibration for prepared stores." + ) + _show_themed_error_dialog( + self, + "Spatial Calibration Failed", + "Failed to persist spatial calibration to the prepared analysis stores.", + summary=f"{type(exc).__name__}: {exc}", + details=traceback.format_exc(), + ) + self._set_status("Failed to persist spatial calibration.") + return self._accept_with_store_path( selected_request.target_store, analysis_targets=analysis_targets, selected_experiment_path=selected_request.experiment_path, + spatial_calibration=persisted_calibrations[ + Path(selected_request.target_store).expanduser().resolve() + ], ) class AnalysisSelectionDialog(QDialog): @@ -7682,6 +8166,17 @@ def _build_target_default_workflow( or None ) ) + spatial_calibration = self._base_config.spatial_calibration + if target is not None and is_zarr_store_path(target.store_path): + try: + spatial_calibration = load_store_spatial_calibration( + target.store_path + ) + except Exception: + logging.getLogger(__name__).exception( + "Failed to load store spatial calibration for %s.", + target.store_path, + ) return replace( self._session_default_analysis_template, file=file_path, @@ -7689,6 +8184,8 @@ def _build_target_default_workflow( analysis_selected_experiment_path=selected_experiment_path, analysis_apply_to_all=self._current_analysis_apply_to_all(), dask_backend=self._dask_backend_config, + spatial_calibration=spatial_calibration, + spatial_calibration_explicit=False, ) def _analysis_state_workflow_from_payload( @@ -7726,6 +8223,10 @@ def _analysis_state_workflow_from_payload( default_workflow.analysis_parameters, ) ) + spatial_calibration_payload = payload.get( + "spatial_calibration", + default_workflow.spatial_calibration, + ) return replace( default_workflow, flatfield=selected_flags["flatfield"], @@ -7736,6 +8237,15 @@ def _analysis_state_workflow_from_payload( registration=selected_flags["registration"], visualization=selected_flags["visualization"], mip_export=selected_flags["mip_export"], + spatial_calibration=normalize_spatial_calibration( + spatial_calibration_payload + ), + spatial_calibration_explicit=bool( + payload.get( + "spatial_calibration_explicit", + default_workflow.spatial_calibration_explicit, + ) + ), analysis_parameters=analysis_parameters, ) @@ -7778,6 +8288,12 @@ def _analysis_gui_state_payload_from_current_widgets(self) -> Dict[str, Any]: payload["analysis_selected_experiment_path"] = str( target.experiment_path ) + payload["spatial_calibration"] = spatial_calibration_to_dict( + self._base_config.spatial_calibration + ) + payload["spatial_calibration_explicit"] = bool( + self._base_config.spatial_calibration_explicit + ) return payload def _persist_analysis_gui_state_for_target( @@ -7933,6 +8449,12 @@ def _restore_analysis_state_for_target( self._base_config.analysis_apply_to_all = bool( restored_workflow.analysis_apply_to_all ) + self._base_config.spatial_calibration = ( + restored_workflow.spatial_calibration + ) + self._base_config.spatial_calibration_explicit = bool( + restored_workflow.spatial_calibration_explicit + ) self._set_analysis_state_source_text( source_text, has_latest_run=latest_completed_state is not None, @@ -14502,6 +15024,10 @@ def _on_run(self) -> None: "visualization": selected_flags["visualization"], "mip_export": selected_flags["mip_export"], "zarr_save": self._base_config.zarr_save, + "spatial_calibration": self._base_config.spatial_calibration, + "spatial_calibration_explicit": bool( + self._base_config.spatial_calibration_explicit + ), "analysis_parameters": analysis_parameters, } dataclass_fields = getattr(WorkflowConfig, "__dataclass_fields__", {}) @@ -14738,6 +15264,8 @@ def _reset_analysis_selection_for_next_run(workflow: WorkflowConfig) -> Workflow "visualization": False, "mip_export": False, "zarr_save": workflow.zarr_save, + "spatial_calibration": workflow.spatial_calibration, + "spatial_calibration_explicit": workflow.spatial_calibration_explicit, "analysis_parameters": analysis_parameters, } dataclass_fields = getattr(WorkflowConfig, "__dataclass_fields__", {}) @@ -14785,6 +15313,18 @@ def _workflows_for_selected_analysis_scope( analysis. """ targets = _analysis_targets_for_workflow(workflow) + + def _target_spatial_calibration(target: AnalysisTarget) -> SpatialCalibrationConfig: + if is_zarr_store_path(target.store_path): + try: + return load_store_spatial_calibration(target.store_path) + except Exception: + logging.getLogger(__name__).exception( + "Failed to load store spatial calibration for %s.", + target.store_path, + ) + return workflow.spatial_calibration + if not targets: return (workflow,) if workflow.analysis_apply_to_all and len(targets) > 1: @@ -14794,6 +15334,8 @@ def _workflows_for_selected_analysis_scope( file=str(target.store_path), analysis_selected_experiment_path=str(target.experiment_path), analysis_apply_to_all=False, + spatial_calibration=_target_spatial_calibration(target), + spatial_calibration_explicit=False, ) for target in targets ) @@ -14806,13 +15348,21 @@ def _workflows_for_selected_analysis_scope( == str(selected_target.experiment_path) and not workflow.analysis_apply_to_all ): - return (workflow,) + return ( + replace( + workflow, + spatial_calibration=_target_spatial_calibration(selected_target), + spatial_calibration_explicit=False, + ), + ) return ( replace( workflow, file=str(selected_target.store_path), analysis_selected_experiment_path=str(selected_target.experiment_path), analysis_apply_to_all=False, + spatial_calibration=_target_spatial_calibration(selected_target), + spatial_calibration_explicit=False, ), ) diff --git a/src/clearex/io/CODEX.md b/src/clearex/io/CODEX.md index 16e69e6..e4967b6 100644 --- a/src/clearex/io/CODEX.md +++ b/src/clearex/io/CODEX.md @@ -14,6 +14,10 @@ This folder contains ingestion, CLI, logging, and provenance logic. - For non-Zarr/N5 sources: create `data_store.zarr` beside `experiment.yml`. - For Zarr/N5 sources: reuse existing store path (no duplicate copy path). - Canonical base array component is `data` with shape `(t, p, c, z, y, x)`. +- Root store attr `spatial_calibration` is the canonical store-level + stage-to-world mapping for Navigate multiposition placement metadata. +- Missing `spatial_calibration` attrs resolve to identity + (`z=+z,y=+y,x=+x`). ## Materialization Rules @@ -38,6 +42,22 @@ This folder contains ingestion, CLI, logging, and provenance logic. acquisition directory (not only the first TIFF file) for clearer provenance. - If `data_store.zarr` was generated by older single-file TIFF logic, rerun materialization from `experiment.yml` to rebuild correct `p`/`c` dimensions. +- Materialization and store initialization must preserve an existing + `spatial_calibration` mapping when present and backfill identity for legacy + stores without rewriting canonical `data`. +- Updating spatial calibration is metadata-only; never rewrite the canonical + source array to express placement changes. + +## Headless Spatial Calibration Override + +- CLI accepts `--stage-axis-map "z=+x,y=none,x=+y"` for Navigate + `experiment.yml` inputs and existing Zarr/N5 stores. +- For `experiment.yml` inputs, explicit overrides are written after store + materialization. +- For existing Zarr/N5 stores, explicit overrides update the root attr before + analysis starts. +- When `--stage-axis-map` is omitted, existing store calibration must be + preserved rather than overwritten with identity. ## Dask Client Defaults @@ -55,6 +75,8 @@ This folder contains ingestion, CLI, logging, and provenance logic. - Provenance is append-only. - Large analysis outputs use latest-only storage under `results//latest`. - Persist effective backend config and analysis parameters. +- Persist effective spatial calibration in workflow provenance so historical + runs preserve the placement rule they used. - Register `latest` output references for discoverability. ## Logging Rules diff --git a/src/clearex/io/README.md b/src/clearex/io/README.md index 8f525a5..3c6d74d 100644 --- a/src/clearex/io/README.md +++ b/src/clearex/io/README.md @@ -14,6 +14,10 @@ This folder contains ingestion, CLI, logging, and provenance logic. - For non-Zarr/N5 sources: create `data_store.zarr` beside `experiment.yml`. - For Zarr/N5 sources: reuse existing store path (no duplicate copy path). - Canonical base array component is `data` with shape `(t, p, c, z, y, x)`. +- Root store attr `spatial_calibration` is the canonical store-level + stage-to-world mapping for Navigate multiposition placement metadata. +- Missing `spatial_calibration` attrs resolve to identity + (`z=+z,y=+y,x=+x`). ## Materialization Rules @@ -38,6 +42,22 @@ This folder contains ingestion, CLI, logging, and provenance logic. acquisition directory (not only the first TIFF file) for clearer provenance. - If `data_store.zarr` was generated by older single-file TIFF logic, rerun materialization from `experiment.yml` to rebuild correct `p`/`c` dimensions. +- Materialization and store initialization must preserve an existing + `spatial_calibration` mapping when present and backfill identity for legacy + stores without rewriting canonical `data`. +- Updating spatial calibration is metadata-only; never rewrite the canonical + source array to express placement changes. + +## Headless Spatial Calibration Override + +- CLI accepts `--stage-axis-map "z=+x,y=none,x=+y"` for Navigate + `experiment.yml` inputs and existing Zarr/N5 stores. +- For `experiment.yml` inputs, explicit overrides are written after store + materialization. +- For existing Zarr/N5 stores, explicit overrides update the root attr before + analysis starts. +- When `--stage-axis-map` is omitted, existing store calibration must be + preserved rather than overwritten with identity. ## Dask Client Defaults @@ -53,6 +73,8 @@ This folder contains ingestion, CLI, logging, and provenance logic. - Provenance is append-only. - Large analysis outputs use latest-only storage under `results//latest`. - Persist effective backend config and analysis parameters. +- Persist effective spatial calibration in workflow provenance so historical + runs preserve the placement rule they used. - Register `latest` output references for discoverability. ## Logging Rules diff --git a/src/clearex/io/cli.py b/src/clearex/io/cli.py index f93497c..0f19a1c 100644 --- a/src/clearex/io/cli.py +++ b/src/clearex/io/cli.py @@ -258,6 +258,15 @@ def create_parser() -> argparse.ArgumentParser: action="store_true", help="Refresh the cached execution profile before planning", ) + parser.add_argument( + "--stage-axis-map", + type=str, + default=None, + help=( + "Store-level world z/y/x mapping for Navigate multiposition stage " + "coordinates, for example 'z=+x,y=none,x=+y'." + ), + ) parser.add_argument( "--gui", diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 4a4709a..7a23145 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -55,6 +55,11 @@ # Local Imports from clearex.io.read import ImageInfo +from clearex.workflow import ( + SpatialCalibrationConfig, + spatial_calibration_from_dict, + spatial_calibration_to_dict, +) if TYPE_CHECKING: from dask.delayed import Delayed @@ -77,6 +82,7 @@ _INGESTION_PROGRESS_SCHEMA = "clearex.ingestion_progress.v1" _INGESTION_PROGRESS_ATTR = "ingestion_progress" +_SPATIAL_CALIBRATION_ATTR = "spatial_calibration" def _is_zarr_like_path(path: Path) -> bool: @@ -219,6 +225,55 @@ def _write_ingestion_progress_record( root.attrs[_INGESTION_PROGRESS_ATTR] = serialized +def load_store_spatial_calibration( + zarr_path: Union[str, Path], +) -> SpatialCalibrationConfig: + """Load store-level spatial calibration from root Zarr attrs. + + Parameters + ---------- + zarr_path : str or pathlib.Path + Analysis-store path. + + Returns + ------- + SpatialCalibrationConfig + Parsed store calibration. Missing attrs resolve to identity. + + Raises + ------ + ValueError + If stored spatial calibration metadata is malformed. + """ + root = zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode="r") + return spatial_calibration_from_dict(root.attrs.get(_SPATIAL_CALIBRATION_ATTR)) + + +def save_store_spatial_calibration( + zarr_path: Union[str, Path], + calibration: SpatialCalibrationConfig, +) -> SpatialCalibrationConfig: + """Persist store-level spatial calibration into root Zarr attrs. + + Parameters + ---------- + zarr_path : str or pathlib.Path + Analysis-store path. + calibration : SpatialCalibrationConfig + Calibration payload to persist. + + Returns + ------- + SpatialCalibrationConfig + Normalized calibration written to the store. + """ + normalized = spatial_calibration_from_dict(calibration) + serialized = json.loads(json.dumps(spatial_calibration_to_dict(normalized))) + root = zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode="a") + root.attrs[_SPATIAL_CALIBRATION_ATTR] = serialized + return normalized + + def _resolve_expected_pyramid_level_factors( *, root: Any, @@ -4998,6 +5053,9 @@ def initialize_analysis_store( root = zarr.open_group(str(output_path), mode="a") root.require_group("results") root.require_group("provenance") + spatial_calibration_payload = spatial_calibration_to_dict( + spatial_calibration_from_dict(root.attrs.get(_SPATIAL_CALIBRATION_ATTR)) + ) if "data" in root: if overwrite: del root["data"] @@ -5028,6 +5086,7 @@ def initialize_analysis_store( "navigate_experiment": experiment.to_metadata_dict(), "storage_policy_analysis_outputs": "latest_only", "storage_policy_provenance": "append_only", + _SPATIAL_CALIBRATION_ATTR: spatial_calibration_payload, "chunk_shape_tpczyx": existing_chunks, "configured_chunks_tpczyx": [ int(chunk) for chunk in requested_chunks @@ -5063,6 +5122,7 @@ def initialize_analysis_store( "navigate_experiment": experiment.to_metadata_dict(), "storage_policy_analysis_outputs": "latest_only", "storage_policy_provenance": "append_only", + _SPATIAL_CALIBRATION_ATTR: spatial_calibration_payload, "chunk_shape_tpczyx": [int(chunk) for chunk in normalized_chunks], "configured_chunks_tpczyx": [int(chunk) for chunk in requested_chunks], "resolution_pyramid_factors_tpczyx": pyramid_payload, diff --git a/src/clearex/io/provenance.py b/src/clearex/io/provenance.py index b022676..740cbe3 100644 --- a/src/clearex/io/provenance.py +++ b/src/clearex/io/provenance.py @@ -53,12 +53,14 @@ dask_backend_to_dict, execution_plan_to_dict, execution_policy_to_dict, + format_spatial_calibration, format_dask_backend_summary, format_chunks, format_execution_plan_summary, format_execution_policy_summary, format_zarr_chunks_ptczyx, format_zarr_pyramid_ptczyx, + spatial_calibration_to_dict, ) ArrayLike = Union[np.ndarray, da.Array] @@ -1034,6 +1036,15 @@ def persist_run_provenance( "visualization": workflow.visualization, "mip_export": workflow.mip_export, "selected_analyses": _selected_analyses(workflow), + "spatial_calibration": spatial_calibration_to_dict( + workflow.spatial_calibration + ), + "spatial_calibration_text": format_spatial_calibration( + workflow.spatial_calibration + ), + "spatial_calibration_explicit": bool( + workflow.spatial_calibration_explicit + ), "analysis_parameters": _to_jsonable(workflow.analysis_parameters), "analysis_output_policy": "latest_only", } diff --git a/src/clearex/main.py b/src/clearex/main.py index f8423f2..0ad70c8 100644 --- a/src/clearex/main.py +++ b/src/clearex/main.py @@ -47,9 +47,11 @@ create_dask_client, is_navigate_experiment_file, load_navigate_experiment, + load_store_spatial_calibration, materialize_experiment_data_store, resolve_data_store_path, resolve_experiment_data_path, + save_store_spatial_calibration, ) from clearex.io.cli import create_parser, display_logo from clearex.io.log import initiate_logger @@ -124,6 +126,7 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) CalibrationProfile, DaskBackendConfig, ExecutionPolicy, + SpatialCalibrationConfig, WorkflowConfig, WorkflowExecutionCancelled, analysis_chainable_output_component, @@ -135,11 +138,13 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) execution_plan_to_dict, execution_policy_from_dict, execution_policy_to_dict, + format_spatial_calibration, format_dask_backend_summary, format_chunks, format_execution_plan_summary, format_execution_policy_summary, normalize_analysis_operation_parameters, + parse_spatial_calibration, plan_execution, recommend_local_cluster_config, resolve_analysis_input_component, @@ -419,6 +424,34 @@ def _create_bootstrap_logger() -> logging.Logger: return logger +def _resolve_effective_store_spatial_calibration( + *, + store_path: str, + desired_calibration: SpatialCalibrationConfig, + persist: bool, +) -> SpatialCalibrationConfig: + """Load or persist the effective spatial calibration for one store. + + Parameters + ---------- + store_path : str + Canonical analysis-store path. + desired_calibration : SpatialCalibrationConfig + Candidate calibration selected for the current workflow. + persist : bool + Whether to write ``desired_calibration`` back to the store before + returning it. + + Returns + ------- + SpatialCalibrationConfig + Effective store calibration used for the current run. + """ + if persist: + return save_store_spatial_calibration(store_path, desired_calibration) + return load_store_spatial_calibration(store_path) + + def _build_workflow_config(args: argparse.Namespace) -> WorkflowConfig: """Translate parsed CLI arguments into a workflow configuration. @@ -522,6 +555,15 @@ def _build_workflow_config(args: argparse.Namespace) -> WorkflowConfig: else effective_execution_policy.calibration_policy ), ) + stage_axis_map_arg = getattr(args, "stage_axis_map", None) + spatial_calibration_explicit = bool( + stage_axis_map_arg is not None and str(stage_axis_map_arg).strip() + ) + spatial_calibration = ( + parse_spatial_calibration(stage_axis_map_arg) + if spatial_calibration_explicit + else SpatialCalibrationConfig() + ) return WorkflowConfig( file=args.file, @@ -541,6 +583,8 @@ def _build_workflow_config(args: argparse.Namespace) -> WorkflowConfig: registration=args.registration, visualization=args.visualization, mip_export=args.mip_export, + spatial_calibration=spatial_calibration, + spatial_calibration_explicit=spatial_calibration_explicit, analysis_parameters=usegment3d_analysis_parameters, ) @@ -1207,6 +1251,7 @@ def _emit_analysis_progress(percent: int, message: str) -> None: runtime_analysis_parameters = normalize_analysis_operation_parameters( workflow.analysis_parameters ) + runtime_spatial_calibration = workflow.spatial_calibration if workflow.file: is_experiment_input = is_navigate_experiment_file(workflow.file) @@ -1271,11 +1316,20 @@ def _emit_analysis_progress(percent: int, message: str) -> None: ) image_info = materialized.source_image_info provenance_store_path = str(materialized.store_path) + runtime_spatial_calibration = ( + _resolve_effective_store_spatial_calibration( + store_path=provenance_store_path, + desired_calibration=workflow.spatial_calibration, + persist=workflow.spatial_calibration_explicit, + ) + ) _log_loaded_image(image_info, logger) logger.info( "Materialized source data to Zarr store " f"{materialized.store_path} (component=data, " - f"chunks_tpczyx={materialized.chunks_tpczyx})." + f"chunks_tpczyx={materialized.chunks_tpczyx}, " + "spatial_calibration=" + f"{format_spatial_calibration(runtime_spatial_calibration)})." ) step_records.append( { @@ -1296,6 +1350,9 @@ def _emit_analysis_progress(percent: int, message: str) -> None: list(levels) for levels in workflow.zarr_save.pyramid_ptczyx ], + "spatial_calibration": format_spatial_calibration( + runtime_spatial_calibration + ), }, } ) @@ -1311,6 +1368,13 @@ def _emit_analysis_progress(percent: int, message: str) -> None: if input_path and is_zarr_store_path(input_path): provenance_store_path = input_path + runtime_spatial_calibration = ( + _resolve_effective_store_spatial_calibration( + store_path=input_path, + desired_calibration=workflow.spatial_calibration, + persist=workflow.spatial_calibration_explicit, + ) + ) step_records.append( { @@ -1328,6 +1392,9 @@ def _emit_analysis_progress(percent: int, message: str) -> None: ), "chunks": format_chunks(workflow.chunks) or None, "dask_backend": dask_backend_to_dict(workflow.dask_backend), + "spatial_calibration": format_spatial_calibration( + runtime_spatial_calibration + ), }, } ) @@ -2556,6 +2623,8 @@ def _mip_export_progress(percent: int, message: str) -> None: visualization=workflow.visualization, mip_export=workflow.mip_export, zarr_save=workflow.zarr_save, + spatial_calibration=runtime_spatial_calibration, + spatial_calibration_explicit=workflow.spatial_calibration_explicit, analysis_parameters=runtime_analysis_parameters, ) try: diff --git a/src/clearex/visualization/CODEX.md b/src/clearex/visualization/CODEX.md index f3be051..48cd9ea 100644 --- a/src/clearex/visualization/CODEX.md +++ b/src/clearex/visualization/CODEX.md @@ -64,15 +64,27 @@ This folder owns napari-facing visualization workflows. - single-position mode (`show_all_positions=False`, use `position_index`), and - multiposition mode (`show_all_positions=True`, render all positions). - Stage coordinates are resolved from `multi_positions.yml` adjacent to `source_experiment` when available (fallback: `MultiPositions` in experiment metadata). -- Parsed stage rows use fields `X`, `Y`, `Z`, and `THETA` (`F` is ignored for visualization transforms). +- Parsed stage rows use fields `X`, `Y`, `Z`, `F`, and `THETA`. +- Root store attr `spatial_calibration` defines how world `z/y/x` + translations are derived from Navigate stage coordinates. Missing attrs + resolve to identity mapping `z=+z,y=+y,x=+x`. - Per-position napari affine uses homogeneous `6x6` matrix in `(t, c, z, y, x)` coordinates: + - world-axis bindings support `+/-x`, `+/-y`, `+/-z`, `+/-f`, and `none`, + - `none` forces zero translation on that world axis, + - sign inversion is applied before translation, - `THETA` rotates the `z/y` plane (sample rotation around x axis). - stage coordinates are in microns and affine translations are applied directly in world-space microns. -- Persisted visualization metadata includes: +- Napari image-layer metadata includes: + - `position_affines_tczyx`, + - `stage_positions_xyztheta`, + - `stage_positions_xyzthetaf`, + - `spatial_calibration`, + - `spatial_calibration_text`. +- Latest visualization metadata includes: - `selected_positions`, - `show_all_positions`, - - `position_affines_tczyx`, - - `stage_positions_xyztheta`. + - `spatial_calibration`, + - `spatial_calibration_text`. ## GUI/Threading Contract diff --git a/src/clearex/visualization/README.md b/src/clearex/visualization/README.md index 201f9c0..ec7d0d3 100644 --- a/src/clearex/visualization/README.md +++ b/src/clearex/visualization/README.md @@ -55,15 +55,27 @@ This folder owns napari-facing visualization workflows. - single-position mode (`show_all_positions=False`, use `position_index`), and - multiposition mode (`show_all_positions=True`, render all positions). - Stage coordinates are resolved from `multi_positions.yml` adjacent to `source_experiment` when available (fallback: `MultiPositions` in experiment metadata). -- Parsed stage rows use fields `X`, `Y`, `Z`, and `THETA` (`F` is ignored for visualization transforms). +- Parsed stage rows use fields `X`, `Y`, `Z`, `F`, and `THETA`. +- Root store attr `spatial_calibration` defines how world `z/y/x` + translations are derived from Navigate stage coordinates. Missing attrs + resolve to identity mapping `z=+z,y=+y,x=+x`. - Per-position napari affine uses homogeneous `6x6` matrix in `(t, c, z, y, x)` coordinates: + - world-axis bindings support `+/-x`, `+/-y`, `+/-z`, `+/-f`, and `none`, + - `none` forces zero translation on that world axis, + - sign inversion is applied before translation, - `THETA` rotates the `z/y` plane (sample rotation around x axis). - stage coordinates are in microns and affine translations are applied directly in world-space microns. -- Persisted visualization metadata includes: +- Napari image-layer metadata includes: + - `position_affines_tczyx`, + - `stage_positions_xyztheta`, + - `stage_positions_xyzthetaf`, + - `spatial_calibration`, + - `spatial_calibration_text`. +- Latest visualization metadata includes: - `selected_positions`, - `show_all_positions`, - - `position_affines_tczyx`, - - `stage_positions_xyztheta`. + - `spatial_calibration`, + - `spatial_calibration_text`. ## GUI/Threading Contract diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index cb559a3..1b3923b 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -51,6 +51,12 @@ # Local Imports from clearex.io.experiment import load_navigate_experiment from clearex.io.provenance import register_latest_output_reference +from clearex.workflow import ( + SpatialCalibrationConfig, + format_spatial_calibration, + spatial_calibration_from_dict, + spatial_calibration_to_dict, +) ProgressCallback = Callable[[int, str], None] @@ -2390,7 +2396,7 @@ def _parse_multiposition_stage_rows(payload: Any) -> list[dict[str, float]]: Returns ------- list[dict[str, float]] - Parsed rows with ``x``, ``y``, ``z``, and ``theta`` values. + Parsed rows with ``x``, ``y``, ``z``, ``theta``, and ``f`` values. """ if not isinstance(payload, list): return [] @@ -2420,11 +2426,30 @@ def _value(field: str, fallback_index: int) -> float: "y": _value("Y", 1), "z": _value("Z", 2), "theta": _value("THETA", 3), + "f": _value("F", 4), } ) return parsed_rows +def _load_spatial_calibration( + root_attrs: Mapping[str, Any], +) -> SpatialCalibrationConfig: + """Load store-level spatial calibration from root attrs. + + Parameters + ---------- + root_attrs : mapping[str, Any] + Root Zarr attributes. + + Returns + ------- + SpatialCalibrationConfig + Parsed store calibration. Missing attrs resolve to identity. + """ + return spatial_calibration_from_dict(root_attrs.get("spatial_calibration")) + + def _load_multiposition_stage_rows( root_attrs: Mapping[str, Any], ) -> list[dict[str, float]]: @@ -2477,9 +2502,9 @@ def _load_multiposition_stage_rows( def _build_position_affine_tczyx( *, - delta_x: float, - delta_y: float, - delta_z: float, + delta_world_x: float, + delta_world_y: float, + delta_world_z: float, delta_theta_deg: float, scale_tczyx: Sequence[float], ) -> np.ndarray: @@ -2487,12 +2512,12 @@ def _build_position_affine_tczyx( Parameters ---------- - delta_x : float - Stage X translation delta relative to reference position. - delta_y : float - Stage Y translation delta relative to reference position. - delta_z : float - Stage Z translation delta relative to reference position. + delta_world_x : float + World X translation delta relative to reference position. + delta_world_y : float + World Y translation delta relative to reference position. + delta_world_z : float + World Z translation delta relative to reference position. delta_theta_deg : float Stage rotation delta (degrees) around sample X axis. scale_tczyx : sequence of float @@ -2518,18 +2543,47 @@ def _build_position_affine_tczyx( # Stage coordinates are reported in microns. Napari affine translation is # interpreted in world units, so pass micron offsets directly. - affine[2, 5] = float(delta_z) - affine[3, 5] = float(delta_y) - affine[4, 5] = float(delta_x) + affine[2, 5] = float(delta_world_z) + affine[3, 5] = float(delta_world_y) + affine[4, 5] = float(delta_world_x) return affine +def _resolve_world_axis_delta( + *, + row: Mapping[str, float], + reference: Mapping[str, float], + binding: str, +) -> float: + """Resolve one world-axis translation delta from stage coordinates. + + Parameters + ---------- + row : mapping[str, float] + Current multiposition row. + reference : mapping[str, float] + Reference multiposition row. + binding : str + Canonical spatial-calibration binding for the world axis. + + Returns + ------- + float + Translation delta in stage/world units. + """ + if binding == "none": + return 0.0 + sign = -1.0 if binding.startswith("-") else 1.0 + source_axis = binding[1:] + return sign * float(row[source_axis] - reference[source_axis]) + + def _resolve_position_affines_tczyx( *, root_attrs: Mapping[str, Any], selected_positions: Sequence[int], scale_tczyx: Sequence[float], -) -> tuple[dict[int, np.ndarray], list[dict[str, float]]]: + ) -> tuple[dict[int, np.ndarray], list[dict[str, float]], SpatialCalibrationConfig]: """Resolve per-position affines for napari rendering. Parameters @@ -2543,30 +2597,45 @@ def _resolve_position_affines_tczyx( Returns ------- - tuple[dict[int, numpy.ndarray], list[dict[str, float]]] - Mapping of position index to affine matrix and parsed stage rows. + tuple[dict[int, numpy.ndarray], list[dict[str, float]], SpatialCalibrationConfig] + Mapping of position index to affine matrix, parsed stage rows, and the + effective store calibration. """ affines: dict[int, np.ndarray] = { int(index): np.eye(6, dtype=np.float64) for index in selected_positions } + spatial_calibration = _load_spatial_calibration(root_attrs) stage_rows = _load_multiposition_stage_rows(root_attrs) if not stage_rows: - return affines, [] + return affines, [], spatial_calibration reference = stage_rows[0] + stage_axis_map = spatial_calibration.stage_axis_map_by_world_axis() for position_index in selected_positions: idx = int(position_index) if idx < 0 or idx >= len(stage_rows): continue row = stage_rows[idx] affines[idx] = _build_position_affine_tczyx( - delta_x=float(row["x"] - reference["x"]), - delta_y=float(row["y"] - reference["y"]), - delta_z=float(row["z"] - reference["z"]), + delta_world_x=_resolve_world_axis_delta( + row=row, + reference=reference, + binding=stage_axis_map["x"], + ), + delta_world_y=_resolve_world_axis_delta( + row=row, + reference=reference, + binding=stage_axis_map["y"], + ), + delta_world_z=_resolve_world_axis_delta( + row=row, + reference=reference, + binding=stage_axis_map["z"], + ), delta_theta_deg=float(row["theta"] - reference["theta"]), scale_tczyx=scale_tczyx, ) - return affines, stage_rows + return affines, stage_rows, spatial_calibration def _load_particle_overlay_points( @@ -3416,6 +3485,7 @@ def _save_visualization_metadata( position_index: int, selected_positions: Sequence[int], show_all_positions: bool, + spatial_calibration: SpatialCalibrationConfig, parameters: Mapping[str, Any], overlay_points_count: int, renderer: Optional[Mapping[str, Any]], @@ -3443,6 +3513,8 @@ def _save_visualization_metadata( Rendered position indices. show_all_positions : bool Whether all positions were rendered. + spatial_calibration : SpatialCalibrationConfig + Effective store-level stage-to-world axis mapping used for placement. parameters : mapping[str, Any] Effective visualization parameters. overlay_points_count : int @@ -3480,6 +3552,10 @@ def _save_visualization_metadata( "position_index": int(position_index), "selected_positions": [int(value) for value in selected_positions], "show_all_positions": bool(show_all_positions), + "spatial_calibration": spatial_calibration_to_dict(spatial_calibration), + "spatial_calibration_text": format_spatial_calibration( + spatial_calibration + ), "overlay_points_count": int(overlay_points_count), "renderer": _sanitize_metadata_value(dict(renderer or {})), "launch_mode": str(launch_mode), @@ -3606,7 +3682,11 @@ def _emit(percent: int, message: str) -> None: overlay_points_count=total_overlay_points, point_property_names=point_property_names, ) - position_affines_tczyx, stage_rows = _resolve_position_affines_tczyx( + ( + position_affines_tczyx, + stage_rows, + spatial_calibration, + ) = _resolve_position_affines_tczyx( root_attrs=dict(root.attrs), selected_positions=selected_positions, scale_tczyx=napari_payload.scale_tczyx, @@ -3620,15 +3700,24 @@ def _emit(percent: int, message: str) -> None: str(index): np.asarray(matrix, dtype=np.float64).tolist() for index, matrix in position_affines_tczyx.items() } - napari_payload.image_metadata["stage_positions_xyztheta"] = [ + stage_position_rows = [ { "x": float(row["x"]), "y": float(row["y"]), "z": float(row["z"]), "theta": float(row["theta"]), + "f": float(row["f"]), } for row in stage_rows ] + napari_payload.image_metadata["stage_positions_xyztheta"] = stage_position_rows + napari_payload.image_metadata["stage_positions_xyzthetaf"] = stage_position_rows + napari_payload.image_metadata["spatial_calibration"] = spatial_calibration_to_dict( + spatial_calibration + ) + napari_payload.image_metadata["spatial_calibration_text"] = ( + format_spatial_calibration(spatial_calibration) + ) napari_payload.points_metadata["position_index"] = int(reference_position_index) napari_payload.points_metadata["selected_positions"] = [ int(value) for value in selected_positions @@ -3708,6 +3797,7 @@ def _emit(percent: int, message: str) -> None: position_index=reference_position_index, selected_positions=selected_positions, show_all_positions=show_all_positions, + spatial_calibration=spatial_calibration, parameters=normalized, overlay_points_count=total_overlay_points, renderer=renderer_info, diff --git a/src/clearex/workflow.py b/src/clearex/workflow.py index 085c9cf..c016686 100644 --- a/src/clearex/workflow.py +++ b/src/clearex/workflow.py @@ -83,6 +83,24 @@ str(component): str(operation_name) for operation_name, component in ANALYSIS_KNOWN_OUTPUT_COMPONENTS.items() } +SPATIAL_CALIBRATION_SCHEMA = "clearex.spatial_calibration.v1" +SPATIAL_CALIBRATION_WORLD_AXES = ("z", "y", "x") +SPATIAL_CALIBRATION_SOURCE_AXES = ("x", "y", "z", "f") +SPATIAL_CALIBRATION_ALLOWED_BINDINGS = frozenset( + { + "+x", + "-x", + "+y", + "-y", + "+z", + "-z", + "+f", + "-f", + "none", + } +) +SPATIAL_CALIBRATION_DEFAULT_STAGE_AXIS_MAP_ZYX = ("+z", "+y", "+x") +SPATIAL_CALIBRATION_DEFAULT_THETA_MODE = "rotate_zy_about_x" @dataclass(frozen=True) @@ -4722,6 +4740,411 @@ def format_dask_backend_summary(config: DaskBackendConfig) -> str: ) +def _normalize_spatial_calibration_binding( + value: Any, + *, + axis_name: str, +) -> str: + """Normalize one world-axis spatial-calibration binding. + + Parameters + ---------- + value : Any + Candidate binding value. + axis_name : str + World axis receiving the binding for error context. + + Returns + ------- + str + Canonical lowercase binding. + + Raises + ------ + ValueError + If the binding is empty or unsupported. + """ + text = str(value).strip().lower() + if not text: + raise ValueError( + f"Spatial calibration binding for world axis '{axis_name}' cannot be empty." + ) + if text in SPATIAL_CALIBRATION_SOURCE_AXES: + text = f"+{text}" + if text not in SPATIAL_CALIBRATION_ALLOWED_BINDINGS: + allowed = ", ".join(sorted(SPATIAL_CALIBRATION_ALLOWED_BINDINGS)) + raise ValueError( + f"Spatial calibration binding for world axis '{axis_name}' must be one " + f"of: {allowed}." + ) + return text + + +def _normalize_spatial_calibration_stage_axis_map( + stage_axis_map_zyx: Any, +) -> tuple[str, str, str]: + """Normalize world ``z/y/x`` axis bindings for spatial calibration. + + Parameters + ---------- + stage_axis_map_zyx : Any + Candidate binding payload. Accepts a sequence in ``(z, y, x)`` order or + a mapping with ``z``, ``y``, and ``x`` keys. + + Returns + ------- + tuple[str, str, str] + Canonical world-axis bindings in ``(z, y, x)`` order. + + Raises + ------ + ValueError + If the mapping is malformed or reuses one source axis more than once. + """ + if isinstance(stage_axis_map_zyx, Mapping): + missing_axes = [ + axis_name + for axis_name in SPATIAL_CALIBRATION_WORLD_AXES + if axis_name not in stage_axis_map_zyx + ] + if missing_axes: + raise ValueError( + "Spatial calibration mappings must define z, y, and x bindings." + ) + normalized = ( + _normalize_spatial_calibration_binding( + stage_axis_map_zyx["z"], + axis_name="z", + ), + _normalize_spatial_calibration_binding( + stage_axis_map_zyx["y"], + axis_name="y", + ), + _normalize_spatial_calibration_binding( + stage_axis_map_zyx["x"], + axis_name="x", + ), + ) + elif isinstance(stage_axis_map_zyx, Sequence) and not isinstance( + stage_axis_map_zyx, (str, bytes) + ): + values = tuple(stage_axis_map_zyx) + if len(values) != len(SPATIAL_CALIBRATION_WORLD_AXES): + raise ValueError( + "Spatial calibration stage_axis_map_zyx must define three " + "entries in (z, y, x) order." + ) + normalized = ( + _normalize_spatial_calibration_binding(values[0], axis_name="z"), + _normalize_spatial_calibration_binding(values[1], axis_name="y"), + _normalize_spatial_calibration_binding(values[2], axis_name="x"), + ) + else: + raise ValueError( + "Spatial calibration stage_axis_map_zyx must be a mapping or " + "three-entry sequence." + ) + + seen_sources: set[str] = set() + for axis_name, binding in zip( + SPATIAL_CALIBRATION_WORLD_AXES, + normalized, + strict=False, + ): + if binding == "none": + continue + source_axis = binding[1:] + if source_axis in seen_sources: + raise ValueError( + "Spatial calibration cannot map one stage axis to multiple world " + f"axes. Duplicate source axis '{source_axis}' detected at world " + f"axis '{axis_name}'." + ) + seen_sources.add(source_axis) + return normalized + + +@dataclass(frozen=True) +class SpatialCalibrationConfig: + """Store-level stage-to-world axis mapping for multiposition placement. + + Attributes + ---------- + stage_axis_map_zyx : tuple[str, str, str] + World-axis bindings in ``(z, y, x)`` order. Allowed values are + ``+x``, ``-x``, ``+y``, ``-y``, ``+z``, ``-z``, ``+f``, ``-f``, and + ``none``. + theta_mode : str + Rotation interpretation for Navigate ``THETA`` values. + """ + + stage_axis_map_zyx: tuple[str, str, str] = ( + SPATIAL_CALIBRATION_DEFAULT_STAGE_AXIS_MAP_ZYX + ) + theta_mode: str = SPATIAL_CALIBRATION_DEFAULT_THETA_MODE + + def __post_init__(self) -> None: + """Normalize and validate spatial-calibration fields. + + Parameters + ---------- + None + + Returns + ------- + None + Values are normalized in-place on the frozen dataclass. + + Raises + ------ + ValueError + If bindings or theta mode are invalid. + """ + normalized_bindings = _normalize_spatial_calibration_stage_axis_map( + self.stage_axis_map_zyx + ) + theta_mode = ( + str(self.theta_mode).strip().lower() + or SPATIAL_CALIBRATION_DEFAULT_THETA_MODE + ) + if theta_mode != SPATIAL_CALIBRATION_DEFAULT_THETA_MODE: + raise ValueError( + "Spatial calibration theta_mode must be " + f"'{SPATIAL_CALIBRATION_DEFAULT_THETA_MODE}'." + ) + object.__setattr__(self, "stage_axis_map_zyx", normalized_bindings) + object.__setattr__(self, "theta_mode", theta_mode) + + def stage_axis_map_by_world_axis(self) -> Dict[str, str]: + """Return the world-axis binding mapping. + + Parameters + ---------- + None + + Returns + ------- + dict[str, str] + Mapping of world ``z/y/x`` axes to canonical binding strings. + """ + return { + axis_name: binding + for axis_name, binding in zip( + SPATIAL_CALIBRATION_WORLD_AXES, + self.stage_axis_map_zyx, + strict=False, + ) + } + + +def parse_spatial_calibration( + mapping: Optional[str], +) -> SpatialCalibrationConfig: + """Parse CLI/GUI text into a spatial calibration configuration. + + Parameters + ---------- + mapping : str, optional + Canonical text form such as ``"z=+x,y=none,x=+y"``. + + Returns + ------- + SpatialCalibrationConfig + Parsed and validated calibration. Empty input resolves to identity. + + Raises + ------ + ValueError + If the text is malformed or reuses a non-``none`` stage axis. + """ + if mapping is None: + return SpatialCalibrationConfig() + + text = str(mapping).strip() + if not text: + return SpatialCalibrationConfig() + + assignments: Dict[str, str] = {} + for token in text.split(","): + item = token.strip() + if not item: + continue + if "=" not in item: + raise ValueError( + "Spatial calibration must use 'world_axis=binding' assignments." + ) + axis_name, binding = item.split("=", 1) + key = str(axis_name).strip().lower() + if key not in SPATIAL_CALIBRATION_WORLD_AXES: + raise ValueError( + "Spatial calibration world axes must be z, y, or x." + ) + if key in assignments: + raise ValueError( + f"Spatial calibration world axis '{key}' is assigned more than once." + ) + assignments[key] = str(binding).strip() + + if set(assignments) != set(SPATIAL_CALIBRATION_WORLD_AXES): + raise ValueError( + "Spatial calibration must define exactly z, y, and x assignments." + ) + return SpatialCalibrationConfig( + stage_axis_map_zyx=( + assignments["z"], + assignments["y"], + assignments["x"], + ) + ) + + +def normalize_spatial_calibration( + value: Any, +) -> SpatialCalibrationConfig: + """Normalize flexible spatial-calibration payloads. + + Parameters + ---------- + value : Any + Candidate calibration payload. Accepts + :class:`SpatialCalibrationConfig`, canonical text, or metadata mappings. + + Returns + ------- + SpatialCalibrationConfig + Normalized calibration configuration. + + Raises + ------ + ValueError + If the payload cannot be interpreted as a valid calibration. + """ + if value is None: + return SpatialCalibrationConfig() + if isinstance(value, SpatialCalibrationConfig): + return value + if isinstance(value, str): + return parse_spatial_calibration(value) + if not isinstance(value, Mapping): + raise ValueError( + "Spatial calibration must be a SpatialCalibrationConfig, string, or mapping." + ) + + theta_mode = ( + str( + value.get("theta_mode", SPATIAL_CALIBRATION_DEFAULT_THETA_MODE) + ).strip() + or SPATIAL_CALIBRATION_DEFAULT_THETA_MODE + ) + schema = str(value.get("schema", SPATIAL_CALIBRATION_SCHEMA)).strip() + if schema and schema != SPATIAL_CALIBRATION_SCHEMA: + raise ValueError( + f"Unsupported spatial calibration schema '{schema}'." + ) + + if "stage_axis_map_zyx" in value: + stage_axis_payload = value.get("stage_axis_map_zyx") + elif any(axis_name in value for axis_name in SPATIAL_CALIBRATION_WORLD_AXES): + missing_axes = [ + axis_name + for axis_name in SPATIAL_CALIBRATION_WORLD_AXES + if axis_name not in value + ] + if missing_axes: + raise ValueError( + "Spatial calibration mappings must define z, y, and x bindings." + ) + stage_axis_payload = { + axis_name: value.get(axis_name) + for axis_name in SPATIAL_CALIBRATION_WORLD_AXES + } + else: + raise ValueError( + "Spatial calibration mappings must provide stage_axis_map_zyx or z/y/x keys." + ) + + if isinstance(stage_axis_payload, str): + parsed = parse_spatial_calibration(stage_axis_payload) + return SpatialCalibrationConfig( + stage_axis_map_zyx=parsed.stage_axis_map_zyx, + theta_mode=theta_mode, + ) + + return SpatialCalibrationConfig( + stage_axis_map_zyx=_normalize_spatial_calibration_stage_axis_map( + stage_axis_payload + ), + theta_mode=theta_mode, + ) + + +def spatial_calibration_to_dict( + config: SpatialCalibrationConfig, +) -> Dict[str, Any]: + """Serialize spatial calibration for Zarr attrs and provenance. + + Parameters + ---------- + config : SpatialCalibrationConfig + Calibration to serialize. + + Returns + ------- + dict[str, Any] + JSON-compatible payload with schema, bindings, and theta mode. + """ + normalized = normalize_spatial_calibration(config) + return { + "schema": SPATIAL_CALIBRATION_SCHEMA, + "stage_axis_map_zyx": normalized.stage_axis_map_by_world_axis(), + "theta_mode": normalized.theta_mode, + } + + +def spatial_calibration_from_dict( + payload: Any, +) -> SpatialCalibrationConfig: + """Deserialize spatial calibration from metadata payloads. + + Parameters + ---------- + payload : Any + Stored calibration payload. Missing values resolve to identity. + + Returns + ------- + SpatialCalibrationConfig + Parsed calibration configuration. + """ + return normalize_spatial_calibration(payload) + + +def format_spatial_calibration( + config: Any, +) -> str: + """Format a spatial calibration in canonical text form. + + Parameters + ---------- + config : Any + Calibration payload accepted by :func:`normalize_spatial_calibration`. + + Returns + ------- + str + Canonical text form ``z=...,y=...,x=...``. + """ + normalized = normalize_spatial_calibration(config) + return ",".join( + f"{axis_name}={binding}" + for axis_name, binding in zip( + SPATIAL_CALIBRATION_WORLD_AXES, + normalized.stage_axis_map_zyx, + strict=False, + ) + ) + + @dataclass(frozen=True) class AnalysisTarget: """Resolved experiment/store pair available to the analysis dialog. @@ -4838,6 +5261,12 @@ class WorkflowConfig: Flag indicating whether MIP-export workflow should run. zarr_save : ZarrSaveConfig Analysis-store chunking and pyramid configuration for saved Zarr data. + spatial_calibration : SpatialCalibrationConfig + Store-level Navigate stage-to-world axis mapping used for multiposition + placement metadata. + spatial_calibration_explicit : bool + Whether the current spatial calibration was explicitly supplied by the + operator rather than inherited as the identity default. analysis_parameters : dict[str, dict[str, Any]] Per-analysis runtime parameters keyed by analysis name. """ @@ -4860,6 +5289,10 @@ class WorkflowConfig: visualization: bool = False mip_export: bool = False zarr_save: ZarrSaveConfig = field(default_factory=ZarrSaveConfig) + spatial_calibration: SpatialCalibrationConfig = field( + default_factory=SpatialCalibrationConfig + ) + spatial_calibration_explicit: bool = False analysis_parameters: Dict[str, Dict[str, Any]] = field( default_factory=default_analysis_operation_parameters ) @@ -4884,6 +5317,11 @@ def __post_init__(self) -> None: if not isinstance(self.execution_policy, ExecutionPolicy): self.execution_policy = execution_policy_from_dict(self.execution_policy) self.analysis_targets = normalize_analysis_targets(self.analysis_targets) + if not isinstance(self.spatial_calibration, SpatialCalibrationConfig): + self.spatial_calibration = normalize_spatial_calibration( + self.spatial_calibration + ) + self.spatial_calibration_explicit = bool(self.spatial_calibration_explicit) selected_experiment_path = ( str(self.analysis_selected_experiment_path).strip() if self.analysis_selected_experiment_path is not None diff --git a/tests/gui/test_gui_execution.py b/tests/gui/test_gui_execution.py index 376d4f3..61e7f89 100644 --- a/tests/gui/test_gui_execution.py +++ b/tests/gui/test_gui_execution.py @@ -539,6 +539,22 @@ def test_reset_analysis_selection_for_next_run_preserves_scope() -> None: assert reset.registration is False +def test_reset_analysis_selection_for_next_run_preserves_spatial_calibration() -> None: + workflow = app_module.WorkflowConfig( + file="/tmp/cell_001/data_store.zarr", + spatial_calibration=app_module.SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ), + spatial_calibration_explicit=True, + visualization=True, + ) + + reset = app_module._reset_analysis_selection_for_next_run(workflow) + + assert reset.spatial_calibration == workflow.spatial_calibration + assert reset.spatial_calibration_explicit is True + + def test_load_experiment_list_file_rejects_invalid_format(tmp_path) -> None: list_path = tmp_path / f"broken{app_module._CLEAREX_EXPERIMENT_LIST_FILE_SUFFIX}" list_path.write_text('{"format":"wrong","experiments":["a"]}\n', encoding="utf-8") @@ -614,6 +630,193 @@ def test_apply_experiment_overrides_populates_pixel_size_field() -> None: assert updated["pixel_size"] == "z=0.45, y=0.12, x=0.12" +def test_workflows_for_selected_analysis_scope_uses_store_spatial_calibration( + tmp_path: Path, +) -> None: + first_store = tmp_path / "cell_001" / "data_store.zarr" + second_store = tmp_path / "cell_002" / "data_store.zarr" + for store in (first_store, second_store): + store.parent.mkdir(parents=True, exist_ok=True) + root = app_module.zarr.open_group(str(store), mode="w") + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + app_module.save_store_spatial_calibration( + first_store, + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")), + ) + app_module.save_store_spatial_calibration( + second_store, + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+f", "-y", "+x")), + ) + + workflow = app_module.WorkflowConfig( + file=str(second_store), + analysis_targets=( + app_module.AnalysisTarget( + experiment_path=str(tmp_path / "cell_001" / "experiment.yml"), + store_path=str(first_store), + ), + app_module.AnalysisTarget( + experiment_path=str(tmp_path / "cell_002" / "experiment.yml"), + store_path=str(second_store), + ), + ), + analysis_selected_experiment_path=str(tmp_path / "cell_002" / "experiment.yml"), + analysis_apply_to_all=True, + visualization=True, + ) + + scoped = app_module._workflows_for_selected_analysis_scope(workflow) + + assert [entry.spatial_calibration.stage_axis_map_zyx for entry in scoped] == [ + ("+x", "none", "+y"), + ("+f", "-y", "+x"), + ] + + +def test_setup_dialog_resolves_spatial_calibration_drafts_per_experiment() -> None: + if not app_module.HAS_PYQT6: + return + + app = app_module.QApplication.instance() + if app is None: + app = app_module.QApplication([]) + + dialog = app_module.ClearExSetupDialog(initial=app_module.WorkflowConfig()) + first = Path("/tmp/cell_001/experiment.yml") + second = Path("/tmp/cell_002/experiment.yml") + dialog._spatial_calibration_drafts[first.resolve()] = ( + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")) + ) + dialog._spatial_calibration_drafts[second.resolve()] = ( + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+f", "-y", "+x")) + ) + + dialog._set_current_spatial_calibration(experiment_path=first) + assert dialog._current_spatial_calibration.stage_axis_map_zyx == ( + "+x", + "none", + "+y", + ) + + dialog._set_current_spatial_calibration(experiment_path=second) + assert dialog._current_spatial_calibration.stage_axis_map_zyx == ( + "+f", + "-y", + "+x", + ) + + dialog.close() + + +def test_setup_dialog_prefills_spatial_calibration_from_existing_store( + tmp_path: Path, +) -> None: + if not app_module.HAS_PYQT6: + return + + app = app_module.QApplication.instance() + if app is None: + app = app_module.QApplication([]) + + store_path = tmp_path / "existing_store.zarr" + root = app_module.zarr.open_group(str(store_path), mode="w") + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + app_module.save_store_spatial_calibration( + store_path, + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")), + ) + + dialog = app_module.ClearExSetupDialog(initial=app_module.WorkflowConfig()) + dialog._set_current_spatial_calibration( + experiment_path=tmp_path / "experiment.yml", + target_store=store_path, + ) + + assert dialog._current_spatial_calibration.stage_axis_map_zyx == ( + "+x", + "none", + "+y", + ) + assert "z=+x,y=none,x=+y" in dialog._spatial_calibration_summary.text() + + dialog.close() + + +def test_setup_dialog_persists_spatial_calibration_for_all_requests( + tmp_path: Path, +) -> None: + if not app_module.HAS_PYQT6: + return + + app = app_module.QApplication.instance() + if app is None: + app = app_module.QApplication([]) + + dialog = app_module.ClearExSetupDialog(initial=app_module.WorkflowConfig()) + experiment_a = tmp_path / "cell_001" / "experiment.yml" + experiment_b = tmp_path / "cell_002" / "experiment.yml" + store_a = tmp_path / "cell_001" / "data_store.zarr" + store_b = tmp_path / "cell_002" / "data_store.zarr" + for store in (store_a, store_b): + store.parent.mkdir(parents=True, exist_ok=True) + root = app_module.zarr.open_group(str(store), mode="w") + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + + dialog._spatial_calibration_drafts[experiment_a.resolve()] = ( + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")) + ) + dialog._spatial_calibration_drafts[experiment_b.resolve()] = ( + app_module.SpatialCalibrationConfig(stage_axis_map_zyx=("+f", "-y", "+x")) + ) + requests = ( + app_module.ExperimentStorePreparationRequest( + experiment_path=experiment_a.resolve(), + experiment=_make_navigate_experiment(experiment_a), + source_data_path=tmp_path / "cell_001" / "raw.tif", + target_store=store_a.resolve(), + ), + app_module.ExperimentStorePreparationRequest( + experiment_path=experiment_b.resolve(), + experiment=_make_navigate_experiment(experiment_b), + source_data_path=tmp_path / "cell_002" / "raw.tif", + target_store=store_b.resolve(), + ), + ) + + persisted = dialog._persist_spatial_calibration_for_requests(requests) + + assert persisted[store_a.resolve()].stage_axis_map_zyx == ("+x", "none", "+y") + assert persisted[store_b.resolve()].stage_axis_map_zyx == ("+f", "-y", "+x") + assert ( + app_module.load_store_spatial_calibration(store_a).stage_axis_map_zyx + == ("+x", "none", "+y") + ) + assert ( + app_module.load_store_spatial_calibration(store_b).stage_axis_map_zyx + == ("+f", "-y", "+x") + ) + + dialog.close() + + def test_run_workflow_with_progress_slurm_executes_callback_on_main_thread( monkeypatch, ) -> None: diff --git a/tests/io/test_cli.py b/tests/io/test_cli.py index 5d599bf..ffeb122 100644 --- a/tests/io/test_cli.py +++ b/tests/io/test_cli.py @@ -79,3 +79,9 @@ def test_channel_indices_flag_accepts_indices_and_all(): def test_input_resolution_level_flag_parses_integer(): args = create_parser().parse_args(["--input-resolution-level", "2"]) assert args.input_resolution_level == 2 + + +def test_stage_axis_map_flag_parses_string(): + args = create_parser().parse_args(["--stage-axis-map", "z=+x,y=none,x=+y"]) + + assert args.stage_axis_map == "z=+x,y=none,x=+y" diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index 7d8e084..f892ec9 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -46,12 +46,15 @@ initialize_analysis_store, is_navigate_experiment_file, load_navigate_experiment, + load_store_spatial_calibration, materialize_experiment_data_store, resolve_data_store_path, resolve_experiment_data_path, + save_store_spatial_calibration, write_zyx_block, ) from clearex.io.read import ImageInfo +from clearex.workflow import SpatialCalibrationConfig def _write_minimal_experiment( @@ -413,6 +416,71 @@ def test_initialize_analysis_store_applies_custom_chunks_and_pyramid(tmp_path: P ] +def test_initialize_analysis_store_backfills_identity_spatial_calibration( + tmp_path: Path, +): + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="H5") + experiment = load_navigate_experiment(experiment_path) + store_path = default_analysis_store_path(experiment) + + initialize_analysis_store(experiment=experiment, zarr_path=store_path, overwrite=True) + + calibration = load_store_spatial_calibration(store_path) + + assert calibration == SpatialCalibrationConfig() + + +def test_load_store_spatial_calibration_defaults_to_identity_for_legacy_store( + tmp_path: Path, +): + store_path = tmp_path / "legacy_store.zarr" + root = zarr.open_group(str(store_path), mode="w") + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + + calibration = load_store_spatial_calibration(store_path) + + assert calibration == SpatialCalibrationConfig() + + +def test_save_store_spatial_calibration_round_trip_and_preserves_existing_mapping( + tmp_path: Path, +): + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="H5") + experiment = load_navigate_experiment(experiment_path) + store_path = tmp_path / "store_with_mapping.zarr" + root = zarr.open_group(str(store_path), mode="w") + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + + saved = save_store_spatial_calibration( + store_path, + SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")), + ) + initialize_analysis_store( + experiment=experiment, + zarr_path=store_path, + overwrite=False, + ) + + reloaded = load_store_spatial_calibration(store_path) + + assert saved == SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")) + assert reloaded == saved + + def test_write_zyx_block_numpy(tmp_path: Path): experiment_path = tmp_path / "experiment.yml" _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="H5") diff --git a/tests/io/test_provenance.py b/tests/io/test_provenance.py index 079112d..776def4 100644 --- a/tests/io/test_provenance.py +++ b/tests/io/test_provenance.py @@ -43,7 +43,7 @@ verify_provenance_chain, ) from clearex.io.read import ImageInfo -from clearex.workflow import WorkflowConfig +from clearex.workflow import SpatialCalibrationConfig, WorkflowConfig def test_is_zarr_store_path(): @@ -96,6 +96,7 @@ def test_persist_run_provenance_hash_chain(tmp_path: Path): assert record_1["workflow"]["usegment3d"] is True assert "flatfield" in record_1["workflow"]["selected_analyses"] assert "usegment3d" in record_1["workflow"]["selected_analyses"] + assert record_1["workflow"]["spatial_calibration_text"] == "z=+z,y=+y,x=+x" assert record_1["workflow"]["zarr_chunks_ptczyx"] == "p=1, t=1, c=1, z=256, y=256, x=256" assert "z=1,2,4,8" in record_1["workflow"]["zarr_pyramid_ptczyx"] @@ -104,6 +105,35 @@ def test_persist_run_provenance_hash_chain(tmp_path: Path): assert issues == [] +def test_persist_run_provenance_records_spatial_calibration(tmp_path: Path) -> None: + store_path = tmp_path / "spatial_provenance.zarr" + zarr.open_group(str(store_path), mode="w") + workflow = WorkflowConfig( + file=str(store_path), + visualization=True, + spatial_calibration=SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ), + ) + + run_id = persist_run_provenance( + zarr_path=store_path, + workflow=workflow, + image_info=ImageInfo(path=store_path, shape=(2, 2), dtype=np.uint8), + repo_root=tmp_path, + ) + + root = zarr.open_group(str(store_path), mode="r") + record = dict(root["provenance"]["runs"][run_id].attrs["record"]) + + assert record["workflow"]["spatial_calibration"] == { + "schema": "clearex.spatial_calibration.v1", + "stage_axis_map_zyx": {"z": "+x", "y": "none", "x": "+y"}, + "theta_mode": "rotate_zy_about_x", + } + assert record["workflow"]["spatial_calibration_text"] == "z=+x,y=none,x=+y" + + def test_verify_provenance_chain_detects_tampering(tmp_path: Path): store_path = tmp_path / "tamper_test.zarr" zarr.open_group(str(store_path), mode="w") diff --git a/tests/test_main.py b/tests/test_main.py index 5a1c507..6bc3241 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -17,7 +17,12 @@ import clearex.main as main_module from clearex.io.provenance import persist_run_provenance from clearex.io.read import ImageInfo -from clearex.workflow import ExecutionPolicy, WorkflowConfig, WorkflowExecutionCancelled +from clearex.workflow import ( + ExecutionPolicy, + SpatialCalibrationConfig, + WorkflowConfig, + WorkflowExecutionCancelled, +) from clearex.workflow import DaskBackendConfig, LocalClusterConfig @@ -735,6 +740,32 @@ def test_build_workflow_config_maps_usegment3d_input_resolution_level() -> None: assert params["input_resolution_level"] == 2 +def test_build_workflow_config_parses_stage_axis_map() -> None: + args = SimpleNamespace( + file=None, + dask=True, + chunks=None, + flatfield=False, + deconvolution=False, + shear_transform=False, + particle_detection=False, + usegment3d=False, + channel_indices=None, + input_resolution_level=None, + registration=False, + visualization=False, + mip_export=False, + stage_axis_map="z=+x,y=none,x=+y", + ) + + workflow = main_module._build_workflow_config(args) + + assert workflow.spatial_calibration == SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + assert workflow.spatial_calibration_explicit is True + + def test_build_workflow_config_rejects_invalid_input_resolution_level() -> None: args = SimpleNamespace( file=None, @@ -793,12 +824,14 @@ def test_run_workflow_experiment_file_starts_io_dask_startup( axes="TPCZYX", metadata={}, ) + store_path = tmp_path / "store.zarr" + main_module.zarr.open_group(str(store_path), mode="w") materialized = SimpleNamespace( source_image_info=image_info, data_image_info=image_info, source_path=source_path, source_component="data", - store_path=tmp_path / "store.tmp", + store_path=store_path, chunks_tpczyx=(1, 1, 1, 1, 1, 1), ) @@ -836,6 +869,218 @@ def _fake_configure_dask_backend(*, workflow, logger, exit_stack, workload="io") assert workloads == ["io"] +def test_run_workflow_existing_store_persists_explicit_spatial_calibration( + tmp_path: Path, monkeypatch +) -> None: + store_path = tmp_path / "analysis_store.zarr" + persisted: dict[str, object] = {} + + class _DummyOpener: + def open(self, path, *, prefer_dask, chunks): + del prefer_dask, chunks + return None, ImageInfo( + path=Path(path), + shape=(1, 1, 1, 2, 2, 2), + dtype=np.uint16, + axes="TPCZYX", + metadata={}, + ) + + monkeypatch.setattr(main_module, "ImageOpener", _DummyOpener) + monkeypatch.setattr(main_module, "is_navigate_experiment_file", lambda path: False) + monkeypatch.setattr( + main_module, + "save_store_spatial_calibration", + lambda path, calibration: persisted.update( + {"path": str(path), "calibration": calibration} + ) + or calibration, + ) + monkeypatch.setattr( + main_module, + "persist_run_provenance", + lambda *, zarr_path, workflow, image_info, **kwargs: persisted.update( + { + "provenance_store": str(zarr_path), + "provenance_calibration": workflow.spatial_calibration, + "image_shape": tuple(image_info.shape), + } + ) + or "run-1", + ) + + workflow = WorkflowConfig( + file=str(store_path), + spatial_calibration=SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ), + spatial_calibration_explicit=True, + ) + + main_module._run_workflow( + workflow=workflow, + logger=_test_logger("clearex.test.main.spatial_existing_store"), + ) + + assert persisted["path"] == str(store_path) + assert persisted["calibration"] == SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + assert persisted["provenance_store"] == str(store_path) + assert persisted["provenance_calibration"] == SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + + +def test_run_workflow_experiment_input_persists_explicit_identity_spatial_calibration( + tmp_path: Path, monkeypatch +) -> None: + source_path = tmp_path / "source.tif" + store_path = tmp_path / "prepared_store.zarr" + experiment = SimpleNamespace( + file_type="TIFF", + timepoints=1, + multiposition_count=1, + channel_count=1, + number_z_steps=1, + ) + image_info = ImageInfo( + path=source_path, + shape=(1, 1, 1, 1, 1, 1), + dtype=np.uint16, + axes="TPCZYX", + metadata={}, + ) + materialized = SimpleNamespace( + source_image_info=image_info, + data_image_info=image_info, + source_path=source_path, + source_component="data", + store_path=store_path, + chunks_tpczyx=(1, 1, 1, 1, 1, 1), + ) + persisted: dict[str, object] = {} + + monkeypatch.setattr(main_module, "_configure_dask_backend", lambda **kwargs: None) + monkeypatch.setattr(main_module, "is_navigate_experiment_file", lambda path: True) + monkeypatch.setattr(main_module, "load_navigate_experiment", lambda path: experiment) + monkeypatch.setattr( + main_module, "resolve_experiment_data_path", lambda experiment: source_path + ) + monkeypatch.setattr( + main_module, + "materialize_experiment_data_store", + lambda *, experiment, source_path, chunks, pyramid_factors, client: materialized, + ) + monkeypatch.setattr( + main_module, + "save_store_spatial_calibration", + lambda path, calibration: persisted.update( + {"path": str(path), "calibration": calibration} + ) + or calibration, + ) + monkeypatch.setattr( + main_module, + "persist_run_provenance", + lambda *, zarr_path, workflow, image_info, **kwargs: persisted.update( + { + "provenance_store": str(zarr_path), + "provenance_calibration": workflow.spatial_calibration, + } + ) + or "run-1", + ) + + workflow = WorkflowConfig( + file=str(tmp_path / "experiment.yml"), + spatial_calibration=SpatialCalibrationConfig(), + spatial_calibration_explicit=True, + ) + + main_module._run_workflow( + workflow=workflow, + logger=_test_logger("clearex.test.main.spatial_experiment_input"), + ) + + assert persisted["path"] == str(store_path) + assert persisted["calibration"] == SpatialCalibrationConfig() + assert persisted["provenance_store"] == str(store_path) + assert persisted["provenance_calibration"] == SpatialCalibrationConfig() + + +def test_run_workflow_experiment_input_without_override_preserves_store_mapping( + tmp_path: Path, monkeypatch +) -> None: + source_path = tmp_path / "source.tif" + store_path = tmp_path / "prepared_store.zarr" + experiment = SimpleNamespace( + file_type="TIFF", + timepoints=1, + multiposition_count=1, + channel_count=1, + number_z_steps=1, + ) + image_info = ImageInfo( + path=source_path, + shape=(1, 1, 1, 1, 1, 1), + dtype=np.uint16, + axes="TPCZYX", + metadata={}, + ) + materialized = SimpleNamespace( + source_image_info=image_info, + data_image_info=image_info, + source_path=source_path, + source_component="data", + store_path=store_path, + chunks_tpczyx=(1, 1, 1, 1, 1, 1), + ) + calls: dict[str, object] = {"saved": False} + + monkeypatch.setattr(main_module, "_configure_dask_backend", lambda **kwargs: None) + monkeypatch.setattr(main_module, "is_navigate_experiment_file", lambda path: True) + monkeypatch.setattr(main_module, "load_navigate_experiment", lambda path: experiment) + monkeypatch.setattr( + main_module, "resolve_experiment_data_path", lambda experiment: source_path + ) + monkeypatch.setattr( + main_module, + "materialize_experiment_data_store", + lambda *, experiment, source_path, chunks, pyramid_factors, client: materialized, + ) + monkeypatch.setattr( + main_module, + "load_store_spatial_calibration", + lambda path: SpatialCalibrationConfig(stage_axis_map_zyx=("+x", "none", "+y")), + ) + monkeypatch.setattr( + main_module, + "save_store_spatial_calibration", + lambda path, calibration: calls.update({"saved": True}) or calibration, + ) + monkeypatch.setattr( + main_module, + "persist_run_provenance", + lambda *, zarr_path, workflow, image_info, **kwargs: calls.update( + {"provenance_calibration": workflow.spatial_calibration} + ) + or "run-1", + ) + + workflow = WorkflowConfig(file=str(tmp_path / "experiment.yml")) + + main_module._run_workflow( + workflow=workflow, + logger=_test_logger("clearex.test.main.spatial_experiment_preserve"), + ) + + assert calls["saved"] is False + assert calls["provenance_calibration"] == SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + + def test_run_workflow_skips_matching_provenance_analysis( tmp_path: Path, monkeypatch ) -> None: diff --git a/tests/test_workflow.py b/tests/test_workflow.py index 3c7a878..be5b566 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -44,22 +44,27 @@ LocalClusterConfig, SlurmClusterConfig, SlurmRunnerConfig, + SpatialCalibrationConfig, WorkflowConfig, ZarrSaveConfig, dask_backend_from_dict, dask_backend_to_dict, + format_spatial_calibration, format_dask_backend_summary, format_local_cluster_recommendation_summary, format_chunks, format_pyramid_levels, format_zarr_chunks_ptczyx, format_zarr_pyramid_ptczyx, + normalize_spatial_calibration, parse_chunks, parse_pyramid_levels, + parse_spatial_calibration, normalize_analysis_operation_parameters, recommend_local_cluster_config, resolve_analysis_input_component, resolve_analysis_execution_sequence, + spatial_calibration_to_dict, validate_analysis_input_references, to_tpczyx_chunks, to_tpczyx_pyramid, @@ -104,6 +109,53 @@ def test_format_tuple(self): assert format_chunks((1, 128, 128)) == "1,128,128" +class TestSpatialCalibration: + def test_parse_round_trip(self): + parsed = parse_spatial_calibration("x=+y,z=-f,y=none") + + assert parsed == SpatialCalibrationConfig( + stage_axis_map_zyx=("-f", "none", "+y") + ) + assert format_spatial_calibration(parsed) == "z=-f,y=none,x=+y" + + def test_default_identity(self): + cfg = WorkflowConfig() + + assert cfg.spatial_calibration == SpatialCalibrationConfig() + assert format_spatial_calibration(cfg.spatial_calibration) == "z=+z,y=+y,x=+x" + + def test_rejects_duplicate_non_none_sources(self): + with pytest.raises(ValueError, match="Duplicate source axis 'x'"): + parse_spatial_calibration("z=+x,y=-x,x=none") + + def test_supports_none_binding(self): + parsed = parse_spatial_calibration("z=none,y=+y,x=-f") + + assert parsed.stage_axis_map_zyx == ("none", "+y", "-f") + + def test_normalizes_mapping_payload(self): + parsed = normalize_spatial_calibration( + { + "schema": "clearex.spatial_calibration.v1", + "stage_axis_map_zyx": {"z": "+x", "y": "none", "x": "+y"}, + "theta_mode": "rotate_zy_about_x", + } + ) + + assert parsed == SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + assert spatial_calibration_to_dict(parsed) == { + "schema": "clearex.spatial_calibration.v1", + "stage_axis_map_zyx": {"z": "+x", "y": "none", "x": "+y"}, + "theta_mode": "rotate_zy_about_x", + } + + def test_rejects_partial_top_level_mapping_payload(self): + with pytest.raises(ValueError, match="must define z, y, and x bindings"): + normalize_spatial_calibration({"z": "+x", "x": "+y"}) + + class TestWorkflowConfig: def test_has_analysis_selection(self): cfg = WorkflowConfig() diff --git a/tests/visualization/test_pipeline.py b/tests/visualization/test_pipeline.py index 538167d..18c8fd3 100644 --- a/tests/visualization/test_pipeline.py +++ b/tests/visualization/test_pipeline.py @@ -17,6 +17,7 @@ # Local Imports import clearex.visualization.pipeline as visualization_pipeline from clearex.visualization.pipeline import run_visualization_analysis +from clearex.workflow import spatial_calibration_to_dict def _single_image_volume_layers( @@ -693,6 +694,8 @@ def _fake_launch_napari_viewer( image_metadata = dict(captured["image_metadata"]) assert image_metadata["selected_positions"] == [0, 1] assert image_metadata["show_all_positions"] is True + assert image_metadata["spatial_calibration_text"] == "z=+z,y=+y,x=+x" + assert image_metadata["stage_positions_xyzthetaf"][1]["f"] == 0.0 latest_attrs = dict( zarr.open_group(str(store_path), mode="r")["results"]["visualization"][ @@ -702,6 +705,98 @@ def _fake_launch_napari_viewer( assert latest_attrs["position_index"] == 0 assert latest_attrs["selected_positions"] == [0, 1] assert latest_attrs["show_all_positions"] is True + assert latest_attrs["spatial_calibration_text"] == "z=+z,y=+y,x=+x" + + +def test_resolve_position_affines_tczyx_supports_focus_axis_and_sign_inversion( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + experiment_path.write_text( + json.dumps( + { + "Saving": {"save_directory": str(tmp_path), "file_type": "TIFF"}, + "MicroscopeState": {"timepoints": 1, "number_z_steps": 1}, + } + ) + ) + (tmp_path / "multi_positions.yml").write_text( + json.dumps( + [ + ["X", "Y", "Z", "THETA", "F"], + [0, 0, 0, 0, 0], + [10, 20, 30, 15, 5], + ] + ) + ) + + affines, stage_rows, spatial_calibration = ( + visualization_pipeline._resolve_position_affines_tczyx( + root_attrs={ + "source_experiment": str(experiment_path), + "spatial_calibration": { + "schema": "clearex.spatial_calibration.v1", + "stage_axis_map_zyx": {"z": "+f", "y": "-y", "x": "+x"}, + "theta_mode": "rotate_zy_about_x", + }, + }, + selected_positions=(0, 1), + scale_tczyx=(1.0, 1.0, 1.0, 1.0, 1.0), + ) + ) + + affine = np.asarray(affines[1], dtype=np.float64) + + assert stage_rows[1]["f"] == 5.0 + assert affine[2, 5] == 5.0 + assert affine[3, 5] == -20.0 + assert affine[4, 5] == 10.0 + assert spatial_calibration.stage_axis_map_zyx == ("+f", "-y", "+x") + + +def test_resolve_position_affines_tczyx_supports_none_and_nontrivial_mapping( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + experiment_path.write_text( + json.dumps( + { + "Saving": {"save_directory": str(tmp_path), "file_type": "TIFF"}, + "MicroscopeState": {"timepoints": 1, "number_z_steps": 1}, + } + ) + ) + (tmp_path / "multi_positions.yml").write_text( + json.dumps( + [ + ["X", "Y", "Z", "THETA", "F"], + [0, 0, 0, 0, 0], + [10, 20, 30, 15, 5], + ] + ) + ) + + affines, _stage_rows, spatial_calibration = ( + visualization_pipeline._resolve_position_affines_tczyx( + root_attrs={ + "source_experiment": str(experiment_path), + "spatial_calibration": spatial_calibration_to_dict( + visualization_pipeline.SpatialCalibrationConfig( + stage_axis_map_zyx=("+x", "none", "+y") + ) + ), + }, + selected_positions=(0, 1), + scale_tczyx=(1.0, 1.0, 1.0, 1.0, 1.0), + ) + ) + + affine = np.asarray(affines[1], dtype=np.float64) + + assert affine[2, 5] == 10.0 + assert affine[3, 5] == 0.0 + assert affine[4, 5] == 20.0 + assert spatial_calibration.stage_axis_map_zyx == ("+x", "none", "+y") def test_launch_napari_viewer_applies_axis_labels_after_layer_load( From 58764e6c83757749130ebeb904ef98063808eb64 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Fri, 20 Mar 2026 06:48:06 -0500 Subject: [PATCH 03/10] Document spatial calibration and runtime updates Update CODEX.md to document store-level spatial calibration and other recent runtime changes. Adds a root store attribute `spatial_calibration` as the canonical stage-to-world axis mapping (missing attrs default to identity), records canonical text form (`z=...,y=...,x=...`) and allowed bindings (`+/-x`, `+/-y`, `+/-z`, `+/-f`, `none`). Notes that WorkflowConfig now carries SpatialCalibrationConfig, setup exposes a per-experiment "Spatial Calibration" control that prefills existing stores and writes resolved mappings on Next, headless workflows accept `--stage-axis-map`, visualization affines derive world z/y/x translations (including Navigate F and `none` behavior), and provenance records the effective calibration. Adds a dated "Recent Runtime Updates (2026-03-20)" section summarizing these changes. --- src/clearex/CODEX.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/clearex/CODEX.md b/src/clearex/CODEX.md index 9537043..fd9cbc7 100644 --- a/src/clearex/CODEX.md +++ b/src/clearex/CODEX.md @@ -23,6 +23,9 @@ This directory contains the runtime orchestration surface for ClearEx. - Canonical source array component is `data`. - Analysis outputs use `results//latest/...` (latest-only replacement). - Provenance records are append-only and include workflow + runtime parameters. +- Root store attr `spatial_calibration` is the canonical store-level + stage-to-world axis mapping for Navigate multiposition placement; missing + attrs mean identity mapping. ## Dask Workload Policy @@ -84,6 +87,30 @@ This directory contains the runtime orchestration surface for ClearEx. - auto-built pyramids are cached under `results/visualization_cache/pyramids/...`. +## Recent Runtime Updates (2026-03-20) + +- Added store-level spatial calibration for Navigate multiposition datasets: + - `WorkflowConfig` now carries `SpatialCalibrationConfig`, + - canonical text form is `z=...,y=...,x=...`, + - allowed bindings are `+/-x`, `+/-y`, `+/-z`, `+/-f`, and `none`, + - the root store attr `spatial_calibration` persists schema, mapping, and + `theta_mode`, + - missing attrs resolve to identity instead of requiring backfilled config. +- Setup flow now exposes a lightweight `Spatial Calibration` control per + experiment: + - one draft is kept per experiment while setup is open, + - existing stores prefill the current mapping, + - `Next` writes the resolved mapping to every reused or newly prepared store. +- Headless workflows now accept `--stage-axis-map` for Navigate + `experiment.yml` inputs and existing Zarr/N5 stores. +- Visualization position affines now derive world `z/y/x` translations from + the stored calibration: + - Navigate `F` is available as a placement source, + - `none` zeroes a world axis translation, + - sign inversion is supported, + - `THETA` remains rotation of the `z/y` plane about world `x`. +- Provenance now records the effective spatial calibration used by the run. + ## Sequencing and Inputs - Operation order is driven by `analysis_parameters[]["execution_order"]`. From b48cf7f2f215aef71a16ab7af7e24ff90901baf4 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Fri, 20 Mar 2026 10:14:50 -0500 Subject: [PATCH 04/10] Migrate ClearEx ingestion and storage to Zarr v3 --- environment.yml | 12 +- pyproject.toml | 13 +- src/clearex/deconvolution/pipeline.py | 21 +- src/clearex/detect/pipeline.py | 11 +- src/clearex/flatfield/pipeline.py | 8 +- src/clearex/io/cli.py | 7 + src/clearex/io/experiment.py | 462 ++++++++++++++++++-------- src/clearex/io/n5_legacy_helper.py | 60 ++++ src/clearex/io/provenance.py | 15 +- src/clearex/io/read.py | 8 +- src/clearex/io/zarr_storage.py | 260 +++++++++++++++ src/clearex/main.py | 81 ++++- src/clearex/mip_export/pipeline.py | 6 +- src/clearex/visualization/pipeline.py | 16 +- tests/io/test_cli.py | 7 + tests/io/test_experiment.py | 216 +++++++++--- tests/io/test_provenance.py | 67 +++- tests/io/test_read.py | 110 ++++-- uv.lock | 355 ++++++++++++++++++-- 19 files changed, 1434 insertions(+), 301 deletions(-) create mode 100644 src/clearex/io/n5_legacy_helper.py create mode 100644 src/clearex/io/zarr_storage.py diff --git a/environment.yml b/environment.yml index e88af5b..66e81de 100644 --- a/environment.yml +++ b/environment.yml @@ -3,13 +3,13 @@ channels: - conda-forge - defaults dependencies: - - python>=3.11 + - python>=3.12,<3.13 - pip - cython - - dask=2025.1.0 + - dask=2026.1.1 - dask-image - dask-jobqueue - - distributed=2025.1.0 + - distributed=2026.1.1 - h5py - jupyterlab - matplotlib @@ -23,10 +23,10 @@ dependencies: - scikit-image - scipy - simpleitk - - tifffile - - zarr<3.0 + - tifffile=2025.10.16 + - zarr=3.1.5 - pip: - antspyx - neuroglancer (>=2.40.1,<3.0.0) - PyQt6>=6.7 - - -e . + - -e .[viewer] diff --git a/pyproject.toml b/pyproject.toml index f9755fd..78156e1 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,10 +18,10 @@ dependencies = [ "antspyx", "basicpy", "cython>=3.1.4", - "dask==2025.1.0", + "dask==2026.1.1", "dask-image", "dask-jobqueue", - "distributed==2025.1.0", + "distributed==2026.1.1", "h5py", # u-Segment3D currently requires imagecodecs<2025. "imagecodecs>=2024.9.22,<2025", @@ -36,8 +36,8 @@ dependencies = [ "scikit-image", "scipy<1.13", "seaborn", - "tifffile==2025.1.10", - "zarr<3.0", + "tifffile==2025.10.16", + "zarr>=3,<4", ] [project.optional-dependencies] @@ -52,6 +52,11 @@ usegment3d = [ "cellpose<3", ] +viewer = [ + "ome-zarr>=0.14.0", + "napari-ome-zarr>=0.7.2", +] + dev = [ "black>=25.11.0", "pre-commit", diff --git a/src/clearex/deconvolution/pipeline.py b/src/clearex/deconvolution/pipeline.py index 984ac46..0b31681 100644 --- a/src/clearex/deconvolution/pipeline.py +++ b/src/clearex/deconvolution/pipeline.py @@ -45,6 +45,7 @@ # Local Imports from clearex.deconvolution.petakit import run_petakit_deconvolution from clearex.io.provenance import register_latest_output_reference +from clearex.io.zarr_storage import create_or_overwrite_array if TYPE_CHECKING: from dask.distributed import Client @@ -1069,25 +1070,37 @@ def _persist_synthetic_psf_assets( voxel_z_um=float(voxel_z_um), ) channel_group = synthetic_group.create_group(f"ch{int(channel_index):02d}") - channel_group.create_dataset( + create_or_overwrite_array( + root=channel_group, name="combined_psf_zyx", + shape=tuple(int(v) for v in artifacts.combined_psf_zyx.shape), + dtype=np.float32, data=np.asarray(artifacts.combined_psf_zyx, dtype=np.float32), overwrite=True, ) - channel_group.create_dataset( + create_or_overwrite_array( + root=channel_group, name="detection_psf_zyx", + shape=tuple(int(v) for v in artifacts.detection_psf_zyx.shape), + dtype=np.float32, data=np.asarray(artifacts.detection_psf_zyx, dtype=np.float32), overwrite=True, ) if artifacts.illumination_psf_zyx is not None: - channel_group.create_dataset( + create_or_overwrite_array( + root=channel_group, name="illumination_psf_zyx", + shape=tuple(int(v) for v in artifacts.illumination_psf_zyx.shape), + dtype=np.float32, data=np.asarray(artifacts.illumination_psf_zyx, dtype=np.float32), overwrite=True, ) preview_bytes = np.frombuffer(artifacts.preview_png_bytes, dtype=np.uint8) - channel_group.create_dataset( + create_or_overwrite_array( + root=channel_group, name="preview_png", + shape=tuple(int(v) for v in preview_bytes.shape), + dtype=preview_bytes.dtype, data=preview_bytes, overwrite=True, ) diff --git a/src/clearex/detect/pipeline.py b/src/clearex/detect/pipeline.py index 2a80c63..61445e0 100644 --- a/src/clearex/detect/pipeline.py +++ b/src/clearex/detect/pipeline.py @@ -48,6 +48,7 @@ remove_close_blobs, ) from clearex.io.provenance import register_latest_output_reference +from clearex.io.zarr_storage import create_or_overwrite_array if TYPE_CHECKING: from dask.distributed import Client @@ -553,8 +554,11 @@ def save_particle_detections_to_store( detection_array = np.asarray(detections, dtype=np.float32) row_chunks = int(min(max(1, detection_array.shape[0]), 16384)) - latest_group.create_dataset( + create_or_overwrite_array( + root=latest_group, name="detections", + shape=tuple(int(v) for v in detection_array.shape), + dtype=detection_array.dtype, data=detection_array, chunks=(row_chunks, len(_PARTICLE_COLUMNS)), overwrite=True, @@ -572,8 +576,11 @@ def save_particle_detections_to_store( else np.empty((0, 4), dtype=np.float32) ) points_chunks = int(min(max(1, napari_points.shape[0]), 16384)) - latest_group.create_dataset( + create_or_overwrite_array( + root=latest_group, name="points_tzyx", + shape=tuple(int(v) for v in napari_points.shape), + dtype=napari_points.dtype, data=napari_points, chunks=(points_chunks, 4), overwrite=True, diff --git a/src/clearex/flatfield/pipeline.py b/src/clearex/flatfield/pipeline.py index 6c42107..ba07225 100644 --- a/src/clearex/flatfield/pipeline.py +++ b/src/clearex/flatfield/pipeline.py @@ -46,6 +46,7 @@ import zarr from clearex.io.provenance import register_latest_output_reference +from clearex.io.zarr_storage import write_dask_array if TYPE_CHECKING: from dask.distributed import Client @@ -1368,10 +1369,11 @@ def _emit(percent: int, message: str) -> None: downsampled = downsampled.rechunk(level_chunks) level_component = f"{base_parent}/data_pyramid/level_{level_index}" - write_task = da.to_zarr( - downsampled, - url=str(zarr_path), + write_task = write_dask_array( + zarr_path=zarr_path, component=level_component, + array=downsampled, + chunks=level_chunks, overwrite=True, compute=False, ) diff --git a/src/clearex/io/cli.py b/src/clearex/io/cli.py index 0f19a1c..2a7bac4 100644 --- a/src/clearex/io/cli.py +++ b/src/clearex/io/cli.py @@ -282,6 +282,13 @@ def create_parser() -> argparse.ArgumentParser: help="Force non-interactive mode (overrides --gui)", ) + subparsers = parser.add_subparsers(dest="command") + migrate_parser = subparsers.add_parser( + "migrate-store", + help="Convert an existing ClearEx-managed analysis store to Zarr v3.", + ) + migrate_parser.add_argument("store_path", help="Path to the ClearEx store to migrate.") + return parser diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 7a23145..5f85fb9 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -39,6 +39,7 @@ import math import os import re +import shutil import subprocess import sys import warnings @@ -55,6 +56,19 @@ # Local Imports from clearex.io.read import ImageInfo +from clearex.io.zarr_storage import ( + clear_component, + create_or_overwrite_array, + detect_store_format, + extract_raw_axes_metadata, + is_clearex_analysis_store, + open_group as open_zarr_group, + replace_store_path, + resolve_external_analysis_store_path, + resolve_legacy_v2_store_path, + resolve_staging_store_path, + to_jsonable, +) from clearex.workflow import ( SpatialCalibrationConfig, spatial_calibration_from_dict, @@ -702,15 +716,7 @@ def _extract_zarr_axes(array: Any, group_attrs: dict[str, Any]) -> AxesSpec: tuple of str, optional Normalized source axes, when present. """ - attrs = dict(getattr(array, "attrs", {})) - raw_axes = ( - attrs.get("multiscales", [{}])[0].get("axes") - or group_attrs.get("multiscales", [{}])[0].get("axes") - or attrs.get("_ARRAY_DIMENSIONS") - or group_attrs.get("_ARRAY_DIMENSIONS") - or attrs.get("axes") - or group_attrs.get("axes") - ) + raw_axes = extract_raw_axes_metadata(array, group_attrs) return _normalize_axes_descriptor(raw_axes, ndim=len(tuple(array.shape))) @@ -3141,7 +3147,8 @@ def _materialize_data_pyramid( ): should_overwrite_level = False if should_overwrite_level: - root.create_dataset( + create_or_overwrite_array( + root=root, name=component, shape=level_shape, chunks=level_chunks, @@ -3312,21 +3319,200 @@ def resolve_data_store_path( Returns ------- pathlib.Path - Destination Zarr store path. Existing Zarr/N5 sources are reused - in-place; non-Zarr sources are materialized as ``data_store.zarr`` - next to ``experiment.yml``. + Destination Zarr store path. ClearEx-managed stores are reused + in-place; external Zarr/N5 sources are materialized into a sibling + ClearEx-managed store; non-Zarr sources are materialized as + ``data_store.zarr`` next to ``experiment.yml``. Raises ------ None This helper does not raise custom exceptions. """ + override_path = str(os.environ.get("CLEAREX_OVERRIDE_ANALYSIS_STORE_PATH", "")).strip() + if override_path: + return Path(override_path).expanduser().resolve() + source = Path(source_path).expanduser().resolve() if _is_zarr_like_path(source): - return source + if is_clearex_analysis_store(source): + return source + return resolve_external_analysis_store_path(source) return (experiment.path.parent / "data_store.zarr").resolve() +def _create_synthetic_experiment( + *, + source_path: Path, + source_shape: tuple[int, ...], + source_axes: AxesSpec, +) -> "NavigateExperiment": + """Create a minimal synthetic experiment for direct-source materialization.""" + axes = tuple(source_axes or ()) + axis_sizes = {axis: int(source_shape[idx]) for idx, axis in enumerate(axes)} + channel_count = max(1, int(axis_sizes.get("c", 1))) + return NavigateExperiment( + path=source_path, + raw={"source_path": str(source_path), "synthetic": True}, + save_directory=source_path.parent, + file_type=str(source_path.suffix).upper().lstrip(".") or "ZARR", + microscope_name=None, + image_mode=None, + timepoints=max(1, int(axis_sizes.get("t", 1))), + number_z_steps=max(1, int(axis_sizes.get("z", source_shape[-3] if len(source_shape) >= 3 else 1))), + y_pixels=max(1, int(axis_sizes.get("y", source_shape[-2] if len(source_shape) >= 2 else 1))), + x_pixels=max(1, int(axis_sizes.get("x", source_shape[-1] if len(source_shape) >= 1 else 1))), + multiposition_count=max(1, int(axis_sizes.get("p", 1))), + selected_channels=[ + NavigateChannel(name=f"channel_{idx}", laser=None, laser_index=None, exposure_ms=None, is_selected=True) + for idx in range(channel_count) + ], + xy_pixel_size_um=None, + z_step_um=None, + ) + + +def _legacy_n5_helper_python() -> Optional[str]: + """Return a Python executable that still exposes ``zarr.N5Store``.""" + candidates: list[str] = [] + env_candidate = str(os.environ.get("CLEAREX_LEGACY_N5_PYTHON", "")).strip() + if env_candidate: + candidates.append(env_candidate) + default_candidates = [ + "/opt/anaconda3/bin/python", + shutil.which("python3"), + shutil.which("python"), + ] + for candidate in default_candidates: + if candidate: + candidates.append(str(candidate)) + + seen: set[str] = set() + for candidate in candidates: + if not candidate or candidate in seen: + continue + seen.add(candidate) + try: + probe = subprocess.run( + [ + candidate, + "-c", + "import zarr,sys; sys.exit(0 if hasattr(zarr, 'N5Store') else 1)", + ], + check=False, + capture_output=True, + text=True, + ) + except Exception: + continue + if probe.returncode == 0: + return candidate + return None + + +def _materialize_n5_via_legacy_helper( + *, + experiment: "NavigateExperiment", + source_path: Path, + output_store_path: Path, + chunks: CanonicalShapeTpczyx, + pyramid_factors: tuple[ + tuple[int, ...], + tuple[int, ...], + tuple[int, ...], + tuple[int, ...], + tuple[int, ...], + tuple[int, ...], + ], +) -> Path: + """Materialize an N5 source into an intermediate v2 ClearEx store.""" + legacy_python = _legacy_n5_helper_python() + if legacy_python is None: + raise RuntimeError( + "N5 ingestion requires a legacy Python environment with zarr.N5Store. " + "Set CLEAREX_LEGACY_N5_PYTHON to a compatible interpreter." + ) + + legacy_output = resolve_legacy_v2_store_path(output_store_path) + repo_root = Path(__file__).resolve().parents[3] + command = [ + legacy_python, + "-m", + "clearex.io.n5_legacy_helper", + "--experiment-path", + str(experiment.path), + "--source-path", + str(source_path), + "--output-store", + str(legacy_output), + "--chunks", + ",".join(str(int(value)) for value in chunks), + "--pyramid-factors", + json.dumps([[int(value) for value in axis_levels] for axis_levels in pyramid_factors]), + ] + subprocess.run( + command, + check=True, + cwd=str(repo_root), + env={ + **os.environ, + "PYTHONPATH": str(Path(__file__).resolve().parents[2]), + }, + ) + return legacy_output + + +def migrate_analysis_store( + zarr_path: Union[str, Path], + *, + keep_backup: bool = True, +) -> Path: + """Convert an existing ClearEx-managed store to Zarr v3 in place.""" + store_path = Path(zarr_path).expanduser().resolve() + if not _is_zarr_like_path(store_path): + raise ValueError(f"Path is not a Zarr store: {store_path}") + if not is_clearex_analysis_store(store_path): + raise ValueError(f"Path is not a ClearEx-managed analysis store: {store_path}") + if detect_store_format(store_path) == 3: + return store_path + + staging_path = resolve_staging_store_path(store_path) + if staging_path.exists(): + shutil.rmtree(staging_path) + + source_root = zarr.open_group(str(store_path), mode="r") + target_root = open_zarr_group(staging_path, mode="a", zarr_format=3) + + def _copy_group(source_group: Any, target_group: Any) -> None: + target_group.attrs.update(to_jsonable(dict(getattr(source_group, "attrs", {})))) + for array_key in sorted(source_group.array_keys()): + source_array = source_group[array_key] + chunks = getattr(source_array, "chunks", None) + target_array = create_or_overwrite_array( + root=target_group, + name=str(array_key), + shape=tuple(int(v) for v in source_array.shape), + chunks=tuple(int(v) for v in chunks) if chunks is not None else None, + dtype=source_array.dtype, + overwrite=True, + ) + da.to_zarr(da.from_zarr(source_array), target_array, compute=True) + target_array.attrs.update( + to_jsonable(dict(getattr(source_array, "attrs", {}))) + ) + for group_key in sorted(source_group.group_keys()): + child_target = target_group.require_group(str(group_key)) + _copy_group(source_group[group_key], child_target) + + _copy_group(source_root, target_root) + _ = replace_store_path( + staging_path=staging_path, + target_path=store_path, + keep_backup=keep_backup, + ) + return store_path + + def materialize_experiment_data_store( *, experiment: "NavigateExperiment", @@ -3409,9 +3595,60 @@ def _emit_progress(percent: int, message: str) -> None: if not source_resolved.exists(): raise FileNotFoundError(source_resolved) - store_path = resolve_data_store_path( + final_store_path = resolve_data_store_path( experiment=experiment, source_path=source_resolved ) + if ( + source_resolved.suffix.lower() == ".n5" + and not is_clearex_analysis_store(source_resolved) + and str(os.environ.get("CLEAREX_LEGACY_N5_ACTIVE", "")).strip() != "1" + ): + _emit_progress(10, "Materializing N5 source via legacy helper") + legacy_store_path = _materialize_n5_via_legacy_helper( + experiment=experiment, + source_path=source_resolved, + output_store_path=final_store_path, + chunks=chunks, + pyramid_factors=pyramid_factors, + ) + migrated_legacy_store = migrate_analysis_store( + legacy_store_path, + keep_backup=False, + ) + _ = replace_store_path( + staging_path=migrated_legacy_store, + target_path=final_store_path, + keep_backup=False, + ) + root = zarr.open_group(str(final_store_path), mode="r") + data = root["data"] + canonical_shape = _normalize_tpczyx_shape(tuple(int(size) for size in data.shape)) + canonical_chunks = _normalize_write_chunks( + shape_tpczyx=canonical_shape, + chunks=tuple(int(value) for value in (data.chunks or chunks)), + ) + return MaterializedDataStore( + source_path=source_resolved, + store_path=final_store_path, + source_component=None, + source_image_info=ImageInfo( + path=source_resolved, + shape=canonical_shape, + dtype=np.dtype(data.dtype), + axes="TPCZYX", + metadata={"legacy_n5_helper": True}, + ), + data_image_info=ImageInfo( + path=final_store_path, + shape=canonical_shape, + dtype=np.dtype(data.dtype), + axes="TPCZYX", + metadata={"component": "data", "legacy_n5_helper": True}, + ), + chunks_tpczyx=canonical_chunks, + ) + + working_store_path = resolve_staging_store_path(final_store_path) write_client = client if _is_zarr_like_path(source_resolved) else None source_aligned_worker_count: Optional[int] = None source_aligned_worker_memory_limit_bytes: Optional[int] = None @@ -3483,13 +3720,15 @@ def _emit_progress(percent: int, message: str) -> None: canonical = canonical.rechunk(normalized_chunks) _emit_progress(45, "Preparing chunk-batched canonical writes") - if (not force_rebuild) and has_complete_canonical_data_store(store_path): + if (not force_rebuild) and has_complete_canonical_data_store(final_store_path): _emit_progress(100, "Canonical data store is already complete") - data_root = zarr.open_group(str(store_path), mode="r") - data_chunks = tuple(int(value) for value in (data_root["data"].chunks or normalized_chunks)) + data_root = zarr.open_group(str(final_store_path), mode="r") + data_chunks = tuple( + int(value) for value in (data_root["data"].chunks or normalized_chunks) + ) return MaterializedDataStore( source_path=source_resolved, - store_path=store_path, + store_path=final_store_path, source_component=source_component, source_image_info=ImageInfo( path=source_resolved, @@ -3499,7 +3738,7 @@ def _emit_progress(percent: int, message: str) -> None: metadata=dict(source_meta), ), data_image_info=ImageInfo( - path=store_path, + path=final_store_path, shape=canonical_shape, dtype=source_dtype, axes="TPCZYX", @@ -3511,6 +3750,8 @@ def _emit_progress(percent: int, message: str) -> None: ), ) + store_path = working_store_path + def _write_canonical_component( *, component: str, @@ -3596,9 +3837,7 @@ def _write_canonical_component( chunks_tpczyx=normalized_chunks, ) - should_stage_same_component = ( - store_path == source_resolved and source_component == "data" - ) + should_stage_same_component = False checkpoint_resume_supported = not should_stage_same_component root = zarr.open_group(str(store_path), mode="a") existing_progress_record = _read_ingestion_progress_record(root) @@ -3696,111 +3935,56 @@ def _persist_level_progress( record=ingestion_record, ) - if should_stage_same_component: - # Resume is disabled for staged same-component rewrites. - resume_from_checkpoint = False - base_start_region = 0 - temp_component = "__clearex_tmp_data" - if temp_component in root: - del root[temp_component] - root.create_dataset( - name=temp_component, - shape=canonical_shape, - chunks=normalized_chunks, - dtype=source_dtype.name, - overwrite=True, - ) - _write_canonical_component( - component=temp_component, - progress_start=55, - progress_end=82, - progress_label="Writing staged canonical data", - start_region_index=0, - batch_completed_callback=_persist_base_progress, - ) - _emit_progress(82, "Swapping staged data into canonical component") - if "data" in root: - del root["data"] - root.move(temp_component, "data") - ingestion_record["swap_completed"] = True - ingestion_record["updated_utc"] = _utc_now_iso() - _write_ingestion_progress_record( - store_path=store_path, - record=ingestion_record, - ) - - initialize_analysis_store( - experiment=experiment, - zarr_path=store_path, - overwrite=False, - chunks=chunks, - pyramid_factors=pyramid_factors, - dtype=source_dtype.name, - shape_tpczyx=canonical_shape, - ) - _materialize_data_pyramid( - store_path=store_path, - base_chunks_tpczyx=normalized_chunks, - pyramid_factors=pyramid_factors, - client=client, - progress_callback=progress_callback, - progress_start=86, - progress_end=96, - preserve_existing=False, - level_progress_callback=_persist_level_progress, - ) - _emit_progress(97, "Finalizing store metadata") - else: - initialize_analysis_store( - experiment=experiment, - zarr_path=store_path, - overwrite=not resume_from_checkpoint, - chunks=chunks, - pyramid_factors=pyramid_factors, - dtype=source_dtype.name, - shape_tpczyx=canonical_shape, - ) - _write_canonical_component( - component="data", - progress_start=55, - progress_end=70, - progress_label="Writing canonical data", - start_region_index=base_start_region, - batch_completed_callback=_persist_base_progress, - ) - ingestion_record["swap_completed"] = True - ingestion_record["updated_utc"] = _utc_now_iso() - _write_ingestion_progress_record( - store_path=store_path, - record=ingestion_record, - ) + initialize_analysis_store( + experiment=experiment, + zarr_path=store_path, + overwrite=not resume_from_checkpoint, + chunks=chunks, + pyramid_factors=pyramid_factors, + dtype=source_dtype.name, + shape_tpczyx=canonical_shape, + ) + _write_canonical_component( + component="data", + progress_start=55, + progress_end=70, + progress_label="Writing canonical data", + start_region_index=base_start_region, + batch_completed_callback=_persist_base_progress, + ) + ingestion_record["swap_completed"] = True + ingestion_record["updated_utc"] = _utc_now_iso() + _write_ingestion_progress_record( + store_path=store_path, + record=ingestion_record, + ) - start_regions_by_component: Dict[str, int] = {} - if resume_from_checkpoint: - pyramid_progress = ingestion_record.get("pyramid_progress", {}) - if isinstance(pyramid_progress, dict): - for component, payload in pyramid_progress.items(): - if not isinstance(payload, dict): - continue - try: - completed = int(payload.get("completed_regions", 0)) - except Exception: - continue - start_regions_by_component[str(component)] = max(0, int(completed)) - - _materialize_data_pyramid( - store_path=store_path, - base_chunks_tpczyx=normalized_chunks, - pyramid_factors=pyramid_factors, - client=client, - progress_callback=progress_callback, - progress_start=72, - progress_end=96, - start_regions_by_component=start_regions_by_component, - preserve_existing=resume_from_checkpoint, - level_progress_callback=_persist_level_progress, - ) - _emit_progress(97, "Finalizing store metadata") + start_regions_by_component: Dict[str, int] = {} + if resume_from_checkpoint: + pyramid_progress = ingestion_record.get("pyramid_progress", {}) + if isinstance(pyramid_progress, dict): + for component, payload in pyramid_progress.items(): + if not isinstance(payload, dict): + continue + try: + completed = int(payload.get("completed_regions", 0)) + except Exception: + continue + start_regions_by_component[str(component)] = max(0, int(completed)) + + _materialize_data_pyramid( + store_path=store_path, + base_chunks_tpczyx=normalized_chunks, + pyramid_factors=pyramid_factors, + client=client, + progress_callback=progress_callback, + progress_start=72, + progress_end=96, + start_regions_by_component=start_regions_by_component, + preserve_existing=resume_from_checkpoint, + level_progress_callback=_persist_level_progress, + ) + _emit_progress(97, "Finalizing store metadata") _mark_ingestion_completed(record=ingestion_record) _write_ingestion_progress_record( @@ -3808,7 +3992,13 @@ def _persist_level_progress( record=ingestion_record, ) - root = zarr.open_group(str(store_path), mode="a") + _ = replace_store_path( + staging_path=store_path, + target_path=final_store_path, + keep_backup=False, + ) + + root = zarr.open_group(str(final_store_path), mode="a") source_axes_attr = list(source_axes) if source_axes is not None else None source_metadata_path = str(source_meta.get("source_path", source_resolved)) voxel_size_um_zyx = None @@ -3890,7 +4080,7 @@ def _persist_level_progress( metadata=dict(source_meta), ) data_image_info = ImageInfo( - path=store_path, + path=final_store_path, shape=canonical_shape, dtype=source_dtype, axes="TPCZYX", @@ -3899,7 +4089,7 @@ def _persist_level_progress( _emit_progress(100, "Materialization complete") return MaterializedDataStore( source_path=source_resolved, - store_path=store_path, + store_path=final_store_path, source_component=source_component, source_image_info=source_image_info, data_image_info=data_image_info, @@ -5002,6 +5192,7 @@ def initialize_analysis_store( output_path = Path(zarr_path).expanduser().resolve() output_path.parent.mkdir(parents=True, exist_ok=True) + target_zarr_format = int(str(os.environ.get("CLEAREX_TARGET_ZARR_FORMAT", "3"))) if shape_tpczyx is None: z_size, y_size, x_size = infer_zyx_shape( @@ -5050,7 +5241,11 @@ def initialize_analysis_store( float(experiment.xy_pixel_size_um), ] - root = zarr.open_group(str(output_path), mode="a") + root = open_zarr_group( + output_path, + mode="a", + zarr_format=target_zarr_format if detect_store_format(output_path) is None else None, + ) root.require_group("results") root.require_group("provenance") spatial_calibration_payload = spatial_calibration_to_dict( @@ -5058,7 +5253,7 @@ def initialize_analysis_store( ) if "data" in root: if overwrite: - del root["data"] + clear_component(root, "data") else: existing = root["data"] existing_chunks = ( @@ -5097,7 +5292,8 @@ def initialize_analysis_store( ) return output_path - root.create_dataset( + create_or_overwrite_array( + root=root, name="data", shape=shape, chunks=normalized_chunks, diff --git a/src/clearex/io/n5_legacy_helper.py b/src/clearex/io/n5_legacy_helper.py new file mode 100644 index 0000000..fed6de8 --- /dev/null +++ b/src/clearex/io/n5_legacy_helper.py @@ -0,0 +1,60 @@ +"""Legacy N5 materialization entrypoint for zarr2-compatible Python runtimes.""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +from clearex.io.experiment import ( + load_navigate_experiment, + materialize_experiment_data_store, +) + + +def _parse_chunks(text: str) -> tuple[int, int, int, int, int, int]: + values = tuple(int(part.strip()) for part in str(text).split(",") if part.strip()) + if len(values) != 6: + raise ValueError("chunks must define six comma-separated integers.") + return values + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Materialize an N5 source into a legacy ClearEx Zarr v2 store." + ) + parser.add_argument("--experiment-path", required=True) + parser.add_argument("--source-path", required=True) + parser.add_argument("--output-store", required=True) + parser.add_argument("--chunks", required=True) + parser.add_argument("--pyramid-factors", required=True) + args = parser.parse_args() + + chunks = _parse_chunks(str(args.chunks)) + pyramid_factors_raw = json.loads(str(args.pyramid_factors)) + pyramid_factors = tuple( + tuple(int(value) for value in axis_levels) for axis_levels in pyramid_factors_raw + ) + if len(pyramid_factors) != 6: + raise ValueError("pyramid_factors must define six axis entries.") + + os.environ["CLEAREX_LEGACY_N5_ACTIVE"] = "1" + os.environ["CLEAREX_TARGET_ZARR_FORMAT"] = "2" + os.environ["CLEAREX_OVERRIDE_ANALYSIS_STORE_PATH"] = str( + Path(args.output_store).expanduser().resolve() + ) + + experiment = load_navigate_experiment(Path(args.experiment_path)) + materialize_experiment_data_store( + experiment=experiment, + source_path=Path(args.source_path), + chunks=chunks, + pyramid_factors=pyramid_factors, + force_rebuild=True, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/clearex/io/provenance.py b/src/clearex/io/provenance.py index 740cbe3..38dca68 100644 --- a/src/clearex/io/provenance.py +++ b/src/clearex/io/provenance.py @@ -48,6 +48,7 @@ # Local Imports from clearex.io.read import ImageInfo +from clearex.io.zarr_storage import create_or_overwrite_array, write_dask_array from clearex.workflow import ( WorkflowConfig, dask_backend_to_dict, @@ -890,15 +891,21 @@ def store_latest_analysis_output( if isinstance(output_array, da.Array): data = output_array.rechunk(chunks) if chunks is not None else output_array - da.to_zarr(data, url=str(zarr_path), component=component, overwrite=True) + write_dask_array( + zarr_path=zarr_path, + component=component, + array=data, + overwrite=True, + ) else: root = zarr.open_group(str(zarr_path), mode="a") results_group = root.require_group("results") analysis_group = results_group.require_group(key) - if "latest" in analysis_group: - del analysis_group["latest"] - analysis_group.create_dataset( + create_or_overwrite_array( + root=analysis_group, name="latest", + shape=tuple(int(v) for v in output_array.shape), + dtype=output_array.dtype, data=output_array, chunks=chunks, overwrite=True, diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 4ce55fd..4a16a8f 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -41,6 +41,7 @@ from numpy.typing import NDArray # Local Imports +from clearex.io.zarr_storage import extract_raw_axes_metadata ArrayLike = Union[NDArray[Any], da.Array] @@ -561,12 +562,7 @@ def _walk_arrays(group: Any, prefix: str = "") -> None: try: group_attrs = dict(getattr(grp, "attrs", {})) attrs = getattr(array, "attrs", {}) - axes = ( - attrs.get("multiscales", [{}])[0].get("axes") - or group_attrs.get("multiscales", [{}])[0].get("axes") - or attrs.get("axes") - or group_attrs.get("axes") - ) + axes = extract_raw_axes_metadata(array, group_attrs) meta = dict(group_attrs) meta.update(dict(attrs)) meta["selected_array_path"] = array_path diff --git a/src/clearex/io/zarr_storage.py b/src/clearex/io/zarr_storage.py new file mode 100644 index 0000000..79efb26 --- /dev/null +++ b/src/clearex/io/zarr_storage.py @@ -0,0 +1,260 @@ +"""Shared Zarr storage helpers for v2/v3 compatibility.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Mapping, Optional +import inspect +import json +import shutil + +import dask.array as da +import numpy as np +import zarr + + +_OPEN_GROUP_SIGNATURE = inspect.signature(zarr.open_group) +_OPEN_GROUP_FORMAT_KEY = ( + "zarr_format" + if "zarr_format" in _OPEN_GROUP_SIGNATURE.parameters + else "zarr_version" + if "zarr_version" in _OPEN_GROUP_SIGNATURE.parameters + else None +) + + +def to_jsonable(value: Any) -> Any: + """Round-trip a value through JSON-compatible types.""" + return json.loads(json.dumps(value)) + + +def open_group( + zarr_path: str | Path, + *, + mode: str = "a", + zarr_format: Optional[int] = None, +) -> Any: + """Open a Zarr group, forwarding format selection when supported.""" + kwargs: dict[str, Any] = {} + if zarr_format is not None and _OPEN_GROUP_FORMAT_KEY is not None: + kwargs[_OPEN_GROUP_FORMAT_KEY] = int(zarr_format) + return zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode=mode, **kwargs) + + +def detect_store_format(zarr_path: str | Path) -> Optional[int]: + """Return the on-disk Zarr format version when it can be inferred.""" + path = Path(zarr_path).expanduser().resolve() + if not path.exists(): + return None + if (path / "zarr.json").exists(): + return 3 + if (path / ".zgroup").exists() or (path / ".zarray").exists(): + return 2 + if path.suffix.lower() == ".n5": + return 2 + return None + + +def is_clearex_analysis_store(zarr_path: str | Path) -> bool: + """Return whether a store looks like a ClearEx-managed analysis store.""" + try: + root = open_group(zarr_path, mode="r") + except Exception: + return False + schema = str(root.attrs.get("schema", "")).strip() + return schema.startswith("clearex.analysis_store") + + +def resolve_external_analysis_store_path(source_path: str | Path) -> Path: + """Return the sibling ClearEx-managed store path for an external source store.""" + source = Path(source_path).expanduser().resolve() + if source.name.endswith(".clearex.zarr"): + return source + return source.with_name(f"{source.name}.clearex.zarr") + + +def resolve_staging_store_path(target_store_path: str | Path) -> Path: + """Return the stable sibling staging path for a target analysis store.""" + target = Path(target_store_path).expanduser().resolve() + return target.with_name(f"{target.name}.staging") + + +def resolve_legacy_v2_store_path(target_store_path: str | Path) -> Path: + """Return the sibling legacy-v2 handoff path used by the N5 helper.""" + target = Path(target_store_path).expanduser().resolve() + return target.with_name(f"{target.name}.legacy-v2.zarr") + + +def replace_store_path( + *, + staging_path: str | Path, + target_path: str | Path, + keep_backup: bool = False, +) -> Optional[Path]: + """Replace a target directory store with a fully written staging store.""" + staging = Path(staging_path).expanduser().resolve() + target = Path(target_path).expanduser().resolve() + if staging == target: + return None + if not staging.exists(): + raise FileNotFoundError(staging) + + backup = target.with_name(f"{target.name}.backup") + if backup.exists(): + shutil.rmtree(backup) + + renamed_target = False + if target.exists(): + target.rename(backup) + renamed_target = True + + try: + staging.rename(target) + except Exception: + if renamed_target and backup.exists() and not target.exists(): + backup.rename(target) + raise + + if renamed_target and backup.exists() and not keep_backup: + shutil.rmtree(backup) + return None + return backup if renamed_target and backup.exists() else None + + +def clear_component(root: Any, component: str) -> None: + """Delete a component from a group when present.""" + if str(component) in root: + del root[str(component)] + + +def create_or_overwrite_array( + *, + root: Any, + name: str, + shape: tuple[int, ...] | list[int] | None = None, + chunks: Any = None, + dtype: Any = None, + data: Any = None, + overwrite: bool = True, + zarr_format: Optional[int] = None, + **kwargs: Any, +) -> Any: + """Create an array across Zarr v2/v3, deleting any existing target when requested.""" + del zarr_format + if overwrite: + clear_component(root, name) + prepared_data = ( + np.asarray(data, dtype=dtype) if data is not None and dtype is not None else data + ) + effective_shape = ( + tuple(int(v) for v in prepared_data.shape) + if prepared_data is not None and shape is None + else shape + ) + effective_chunks = ( + tuple(int(v) for v in effective_shape) + if prepared_data is not None and chunks is None and effective_shape is not None + else chunks + ) + if hasattr(root, "create_array"): + if prepared_data is not None: + return root.create_array( + name=str(name), + data=prepared_data, + chunks=effective_chunks, + overwrite=False, + **kwargs, + ) + return root.create_array( + name=str(name), + shape=effective_shape, + chunks=effective_chunks, + dtype=dtype, + overwrite=False, + **kwargs, + ) + return root.create_dataset( + name=str(name), + shape=effective_shape, + chunks=effective_chunks, + dtype=dtype, + data=prepared_data, + overwrite=False, + **kwargs, + ) + + +def _default_chunks_for_array(array: da.Array) -> tuple[int, ...]: + """Return a concrete chunk tuple from a Dask array.""" + return tuple(int(axis_chunks[0]) for axis_chunks in array.chunks) + + +def write_dask_array( + *, + zarr_path: str | Path, + component: str, + array: da.Array, + overwrite: bool = True, + chunks: Optional[tuple[int, ...]] = None, + compute: bool = True, + zarr_format: Optional[int] = None, +) -> Any: + """Write a Dask array to an explicitly created Zarr array object.""" + root = open_group(zarr_path, mode="a", zarr_format=zarr_format) + target = create_or_overwrite_array( + root=root, + name=str(component), + shape=tuple(int(v) for v in array.shape), + chunks=tuple(int(v) for v in (chunks or _default_chunks_for_array(array))), + dtype=array.dtype, + overwrite=overwrite, + ) + return da.to_zarr(array, target, compute=compute) + + +def _extract_ome_multiscales(attrs: Mapping[str, Any]) -> Any: + """Return OME multiscales metadata from either v0.4 or v0.5 layouts.""" + ome_payload = attrs.get("ome") + if isinstance(ome_payload, Mapping): + multiscales = ome_payload.get("multiscales") + if multiscales: + return multiscales + return attrs.get("multiscales") + + +def _normalize_axis_payload(value: Any) -> Any: + """Collapse OME axis dictionaries into plain axis-name tokens.""" + if not isinstance(value, (list, tuple)): + return value + normalized: list[Any] = [] + changed = False + for axis in value: + if isinstance(axis, Mapping): + normalized.append(axis.get("name")) + changed = True + else: + normalized.append(axis) + return normalized if changed else value + + +def extract_raw_axes_metadata(array: Any, group_attrs: Mapping[str, Any]) -> Any: + """Return the best raw axis descriptor available for a Zarr array.""" + attrs = dict(getattr(array, "attrs", {})) + metadata = getattr(array, "metadata", None) + dimension_names = getattr(metadata, "dimension_names", None) + if dimension_names: + return _normalize_axis_payload(dimension_names) + + raw_axes = ( + _extract_ome_multiscales(attrs)[0].get("axes") + if isinstance(_extract_ome_multiscales(attrs), list) + and _extract_ome_multiscales(attrs) + else _extract_ome_multiscales(group_attrs)[0].get("axes") + if isinstance(_extract_ome_multiscales(group_attrs), list) + and _extract_ome_multiscales(group_attrs) + else attrs.get("_ARRAY_DIMENSIONS") + or group_attrs.get("_ARRAY_DIMENSIONS") + or attrs.get("axes") + or group_attrs.get("axes") + ) + return _normalize_axis_payload(raw_axes) diff --git a/src/clearex/main.py b/src/clearex/main.py index 0ad70c8..ee32cce 100644 --- a/src/clearex/main.py +++ b/src/clearex/main.py @@ -44,11 +44,13 @@ # Local Imports from clearex.io.read import ImageInfo, ImageOpener from clearex.io.experiment import ( + _create_synthetic_experiment, create_dask_client, is_navigate_experiment_file, load_navigate_experiment, load_store_spatial_calibration, materialize_experiment_data_store, + migrate_analysis_store, resolve_data_store_path, resolve_experiment_data_path, save_store_spatial_calibration, @@ -61,6 +63,7 @@ register_latest_output_reference, summarize_analysis_history, ) +from clearex.io.zarr_storage import is_clearex_analysis_store from clearex.detect.pipeline import ( run_particle_detection_analysis, ) @@ -1357,24 +1360,67 @@ def _emit_analysis_progress(percent: int, message: str) -> None: } ) else: - opener = ImageOpener() - _, info = opener.open( - input_path, - prefer_dask=workflow.prefer_dask, - chunks=workflow.chunks, - ) - image_info = info - _log_loaded_image(info, logger) - if input_path and is_zarr_store_path(input_path): - provenance_store_path = input_path - runtime_spatial_calibration = ( - _resolve_effective_store_spatial_calibration( - store_path=input_path, - desired_calibration=workflow.spatial_calibration, - persist=workflow.spatial_calibration_explicit, + resolved_input = Path(input_path).expanduser().resolve() + if is_clearex_analysis_store(resolved_input): + opener = ImageOpener() + _, info = opener.open( + input_path, + prefer_dask=workflow.prefer_dask, + chunks=workflow.chunks, + ) + image_info = info + _log_loaded_image(info, logger) + provenance_store_path = input_path + runtime_spatial_calibration = ( + _resolve_effective_store_spatial_calibration( + store_path=input_path, + desired_calibration=workflow.spatial_calibration, + persist=workflow.spatial_calibration_explicit, + ) + ) + else: + synthetic_experiment = _create_synthetic_experiment( + source_path=resolved_input, + source_shape=tuple(), + source_axes=None, + ) + materialized = materialize_experiment_data_store( + experiment=synthetic_experiment, + source_path=resolved_input, + chunks=workflow.zarr_save.chunks_tpczyx(), + pyramid_factors=workflow.zarr_save.pyramid_tpczyx(), + ) + image_info = materialized.source_image_info + provenance_store_path = str(materialized.store_path) + runtime_spatial_calibration = ( + _resolve_effective_store_spatial_calibration( + store_path=provenance_store_path, + desired_calibration=workflow.spatial_calibration, + persist=workflow.spatial_calibration_explicit, + ) ) + _log_loaded_image(image_info, logger) + step_records.append( + { + "name": "materialize_external_store", + "parameters": { + "source_path": str(materialized.source_path), + "store_path": str(materialized.store_path), + "target_component": "data", + "chunks_tpczyx": list(materialized.chunks_tpczyx), + }, + } + ) + else: + opener = ImageOpener() + _, info = opener.open( + input_path, + prefer_dask=workflow.prefer_dask, + chunks=workflow.chunks, ) + image_info = info + _log_loaded_image(info, logger) step_records.append( { @@ -2701,6 +2747,11 @@ def main() -> None: args = parser.parse_args() bootstrap_logger = _create_bootstrap_logger() + if getattr(args, "command", None) == "migrate-store": + migrated_path = migrate_analysis_store(str(args.store_path)) + bootstrap_logger.info(f"Migrated ClearEx store to Zarr v3: {migrated_path}") + return + try: workflow = _build_workflow_config(args) except ValueError as exc: diff --git a/src/clearex/mip_export/pipeline.py b/src/clearex/mip_export/pipeline.py index 284e351..b2303a1 100644 --- a/src/clearex/mip_export/pipeline.py +++ b/src/clearex/mip_export/pipeline.py @@ -44,6 +44,7 @@ # Local Imports from clearex.io.provenance import register_latest_output_reference +from clearex.io.zarr_storage import create_or_overwrite_array if TYPE_CHECKING: from dask.distributed import Client @@ -1726,8 +1727,11 @@ def _write_projection_output( root = zarr.open_group(str(output_path), mode="w") try: payload = np.asarray(projection) - root.create_dataset( + create_or_overwrite_array( + root=root, name="data", + shape=tuple(int(v) for v in payload.shape), + dtype=payload.dtype, data=payload, chunks=_default_projection_chunks(tuple(int(v) for v in payload.shape)), overwrite=True, diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index 1b3923b..1561c55 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -51,6 +51,7 @@ # Local Imports from clearex.io.experiment import load_navigate_experiment from clearex.io.provenance import register_latest_output_reference +from clearex.io.zarr_storage import write_dask_array from clearex.workflow import ( SpatialCalibrationConfig, format_spatial_calibration, @@ -2053,7 +2054,6 @@ def _build_visualization_multiscale_components( source_chunks = ( tuple(source_array.chunks) if source_array.chunks is not None else None ) - source_dtype = np.dtype(source_array.dtype) level_paths: list[str] = [str(source_component)] factor_payload: list[list[int]] = [ [int(value) for value in level_factors_tpczyx[0]] @@ -2111,19 +2111,13 @@ def _build_visualization_multiscale_components( ), chunks_tpczyx=level_chunks, ): - root.create_dataset( - name=level_component, - shape=level_shape, - chunks=level_chunks, - dtype=source_dtype.name, - overwrite=True, - ) with dask.config.set({"array.rechunk.method": "tasks"}): rechunked = downsampled.rechunk(level_chunks) - da.to_zarr( - rechunked, - str(zarr_path), + write_dask_array( + zarr_path=zarr_path, component=level_component, + array=rechunked, + chunks=level_chunks, overwrite=True, compute=True, ) diff --git a/tests/io/test_cli.py b/tests/io/test_cli.py index ffeb122..001bb47 100644 --- a/tests/io/test_cli.py +++ b/tests/io/test_cli.py @@ -85,3 +85,10 @@ def test_stage_axis_map_flag_parses_string(): args = create_parser().parse_args(["--stage-axis-map", "z=+x,y=none,x=+y"]) assert args.stage_axis_map == "z=+x,y=none,x=+y" + + +def test_migrate_store_subcommand_parses_path(): + args = create_parser().parse_args(["migrate-store", "/tmp/example.zarr"]) + + assert args.command == "migrate-store" + assert args.store_path == "/tmp/example.zarr" diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index f892ec9..666f762 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -27,6 +27,7 @@ # Standard Library Imports from pathlib import Path import json +import subprocess # Third Party Imports import dask.array as da @@ -48,12 +49,19 @@ load_navigate_experiment, load_store_spatial_calibration, materialize_experiment_data_store, + migrate_analysis_store, resolve_data_store_path, resolve_experiment_data_path, save_store_spatial_calibration, write_zyx_block, ) from clearex.io.read import ImageInfo +from clearex.io.zarr_storage import ( + create_or_overwrite_array, + open_group as open_zarr_group, + resolve_external_analysis_store_path, + resolve_staging_store_path, +) from clearex.workflow import SpatialCalibrationConfig @@ -88,6 +96,77 @@ def _write_minimal_experiment( path.write_text(json.dumps(payload, indent=2)) +def _wrap_test_zarr_group(group): + class _CompatZarrGroup: + def __init__(self, inner_group): + self._inner_group = inner_group + + def __getattr__(self, name): + return getattr(self._inner_group, name) + + def __contains__(self, key): + return key in self._inner_group + + def __delitem__(self, key): + del self._inner_group[key] + + def __getitem__(self, key): + item = self._inner_group[key] + if hasattr(item, "array_keys") and hasattr(item, "group_keys"): + return _wrap_test_zarr_group(item) + return item + + def create_dataset(self, name, **kwargs): + return create_or_overwrite_array(root=self._inner_group, name=name, **kwargs) + + def create_group(self, name, **kwargs): + return _wrap_test_zarr_group(self._inner_group.create_group(name, **kwargs)) + + def require_group(self, name, **kwargs): + return _wrap_test_zarr_group( + self._inner_group.require_group(name, **kwargs) + ) + + return _CompatZarrGroup(group) + + +def _open_test_zarr_group( + path: Path | str, + *, + mode: str = "a", + zarr_format: int | None = None, +): + if zarr_format is None and mode in {"w", "w-"}: + zarr_format = 2 + return _wrap_test_zarr_group( + open_zarr_group(path, mode=mode, zarr_format=zarr_format) + ) + + +def _write_real_n5_store(path: Path, entries: dict[str, np.ndarray]) -> None: + python_executable = experiment_module._legacy_n5_helper_python() + if python_executable is None: + pytest.skip("No zarr2-compatible Python with N5Store is available.") + + payload = {name: np.asarray(array).tolist() for name, array in entries.items()} + command = [ + python_executable, + "-c", + ( + "from pathlib import Path; " + "import json, numpy as np, sys, zarr; " + "target = Path(sys.argv[1]); " + "entries = json.loads(sys.argv[2]); " + "root = zarr.group(store=zarr.N5Store(str(target)), overwrite=True); " + "[root.create_dataset(name, data=np.asarray(values, dtype=np.uint16), " + "chunks=(1, 3, 4), overwrite=True) for name, values in entries.items()]" + ), + str(path), + json.dumps(payload), + ] + subprocess.run(command, check=True) + + def _write_multipositions_sidecar(path: Path, count: int) -> None: header = ["X", "Y", "Z", "THETA", "F", "X_PIXEL", "Y_PIXEL"] rows = [header] @@ -232,6 +311,8 @@ def _fake_spec_cluster(*, scheduler, workers, asynchronous): lambda: ["/cuda/runtime/lib", "/cuda/cudnn/lib"], ) path_env_var = experiment_module._library_path_env_vars_for_platform()[0] + for extra_env_var in experiment_module._library_path_env_vars_for_platform()[1:]: + monkeypatch.delenv(extra_env_var, raising=False) monkeypatch.setenv(path_env_var, "/cluster/custom/lib") _ = experiment_module.create_dask_client( @@ -435,7 +516,7 @@ def test_load_store_spatial_calibration_defaults_to_identity_for_legacy_store( tmp_path: Path, ): store_path = tmp_path / "legacy_store.zarr" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( name="data", shape=(1, 1, 1, 2, 2, 2), @@ -456,7 +537,7 @@ def test_save_store_spatial_calibration_round_trip_and_preserves_existing_mappin _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="H5") experiment = load_navigate_experiment(experiment_path) store_path = tmp_path / "store_with_mapping.zarr" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( name="data", shape=(1, 1, 1, 2, 2, 2), @@ -638,6 +719,45 @@ def test_materialize_experiment_data_store_creates_data_store_for_non_zarr(tmp_p ) +def test_resolve_data_store_path_uses_sibling_store_for_external_zarr(tmp_path: Path): + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment( + experiment_path, + save_directory=tmp_path, + file_type="OME-ZARR", + ) + experiment = load_navigate_experiment(experiment_path) + source_store = tmp_path / "input.ome.zarr" + _open_test_zarr_group(source_store, mode="w") + + resolved = resolve_data_store_path(experiment, source_store) + + assert resolved == resolve_external_analysis_store_path(source_store) + + +def test_migrate_analysis_store_converts_v2_store_in_place(tmp_path: Path): + store_path = tmp_path / "analysis_store.zarr" + root = _open_test_zarr_group(store_path, mode="w") + root.attrs["schema"] = "clearex.analysis_store.v1" + root.attrs["axes"] = ["t", "p", "c", "z", "y", "x"] + root.create_dataset( + "data", + data=np.arange(24, dtype=np.uint16).reshape(1, 1, 1, 2, 3, 4), + chunks=(1, 1, 1, 1, 3, 4), + overwrite=True, + ) + + migrated = migrate_analysis_store(store_path) + + assert migrated == store_path.resolve() + assert (store_path / "zarr.json").exists() + reopened = zarr.open_group(str(store_path), mode="r") + assert np.array_equal( + np.asarray(reopened["data"]), + np.arange(24, dtype=np.uint16).reshape(1, 1, 1, 2, 3, 4), + ) + + def test_materialize_experiment_data_store_batches_chunk_writes( tmp_path: Path, monkeypatch ): @@ -673,6 +793,7 @@ def _counting_compute(graph, *, client=None): expected_store = (experiment_path.parent / "data_store.zarr").resolve() root = zarr.open_group(str(expected_store), mode="r") assert np.array_equal(np.array(root["data"][0, 0, 0, :, :, :]), source_data) + assert not resolve_staging_store_path(expected_store).exists() assert len(compute_calls) == 8 @@ -714,8 +835,9 @@ def _failing_compute(graph, *, client=None): pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), ) - expected_store = (experiment_path.parent / "data_store.zarr").resolve() - root = zarr.open_group(str(expected_store), mode="r") + staging_store = resolve_staging_store_path(experiment_path.parent / "data_store.zarr") + assert not (experiment_path.parent / "data_store.zarr").exists() + root = zarr.open_group(str(staging_store), mode="r") progress = dict(root.attrs["ingestion_progress"]) assert progress["status"] == "in_progress" assert progress["base_progress"]["completed_regions"] == 3 @@ -739,6 +861,7 @@ def _counting_compute(graph, *, client=None): assert resume_call_count["value"] == 5 root = zarr.open_group(str(materialized.store_path), mode="r") assert np.array_equal(np.array(root["data"][0, 0, 0, :, :, :]), source_data) + assert not staging_store.exists() progress = dict(root.attrs["ingestion_progress"]) assert progress["status"] == "completed" assert progress["base_progress"]["completed_regions"] == 8 @@ -771,7 +894,9 @@ def test_materialize_experiment_data_store_handles_multibatch_base_and_pyramid( ) -def test_materialize_experiment_data_store_reuses_existing_zarr_store(tmp_path: Path): +def test_materialize_experiment_data_store_materializes_external_zarr_store_to_sibling_store( + tmp_path: Path, +): experiment_path = tmp_path / "experiment.yml" _write_minimal_experiment( experiment_path, save_directory=tmp_path, file_type="OME-ZARR" @@ -780,7 +905,7 @@ def test_materialize_experiment_data_store_reuses_existing_zarr_store(tmp_path: source_data = np.arange(24, dtype=np.uint16).reshape(2, 3, 4) source_store = tmp_path / "source.ome.zarr" - source_root = zarr.open_group(str(source_store), mode="w") + source_root = _open_test_zarr_group(source_store, mode="w") source_root.create_dataset("raw", data=source_data, chunks=(1, 3, 4), overwrite=True) source_root["raw"].attrs["_ARRAY_DIMENSIONS"] = ["z", "y", "x"] @@ -791,8 +916,9 @@ def test_materialize_experiment_data_store_reuses_existing_zarr_store(tmp_path: pyramid_factors=((1,), (1,), (1,), (1, 2), (1, 2), (1, 2)), ) - assert materialized.store_path == source_store.resolve() - root = zarr.open_group(str(source_store), mode="r") + expected_store = resolve_external_analysis_store_path(source_store) + assert materialized.store_path == expected_store + root = zarr.open_group(str(materialized.store_path), mode="r") assert "data" in root assert tuple(root["data"].shape) == (1, 1, 1, 2, 3, 4) assert tuple(root["data"].chunks) == (1, 1, 1, 2, 2, 2) @@ -800,11 +926,14 @@ def test_materialize_experiment_data_store_reuses_existing_zarr_store(tmp_path: assert root.attrs["data_pyramid_levels"] == ["data", "data_pyramid/level_1"] assert tuple(root["data_pyramid/level_1"].shape) == (1, 1, 1, 1, 2, 2) assert not (experiment_path.parent / "data_store.zarr").exists() + source_root = zarr.open_group(str(source_store), mode="r") + assert "raw" in source_root + assert "data" not in source_root def test_has_canonical_data_component_detects_ready_store(tmp_path: Path): store_path = tmp_path / "ready_store.n5" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( "data", shape=(1, 2, 3, 4, 5, 6), @@ -819,7 +948,7 @@ def test_has_canonical_data_component_detects_ready_store(tmp_path: Path): def test_has_canonical_data_component_rejects_noncanonical_store(tmp_path: Path): store_path = tmp_path / "raw_source.n5" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( "data", shape=(2, 3, 4), @@ -836,7 +965,7 @@ def test_has_complete_canonical_data_store_rejects_missing_expected_pyramid( tmp_path: Path, ): store_path = tmp_path / "incomplete_store.n5" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( "data", shape=(1, 1, 1, 2, 4, 4), @@ -943,7 +1072,9 @@ def test_materialize_experiment_data_store_reuses_complete_store_by_default_and_ assert rebuilt_root.attrs["data_pyramid_levels"] == ["data", "data_pyramid/level_1"] -def test_materialize_experiment_data_store_handles_same_component_rewrite(tmp_path: Path): +def test_materialize_experiment_data_store_keeps_external_source_store_immutable( + tmp_path: Path, +): experiment_path = tmp_path / "experiment.yml" _write_minimal_experiment( experiment_path, save_directory=tmp_path, file_type="OME-ZARR" @@ -952,11 +1083,11 @@ def test_materialize_experiment_data_store_handles_same_component_rewrite(tmp_pa source_data = np.arange(24, dtype=np.uint16).reshape(2, 3, 4) source_store = tmp_path / "source_data.zarr" - source_root = zarr.open_group(str(source_store), mode="w") + source_root = _open_test_zarr_group(source_store, mode="w") source_root.create_dataset("data", data=source_data, chunks=(1, 3, 4), overwrite=True) source_root["data"].attrs["_ARRAY_DIMENSIONS"] = ["z", "y", "x"] - materialize_experiment_data_store( + materialized = materialize_experiment_data_store( experiment=experiment, source_path=source_store, chunks=(1, 1, 1, 2, 2, 2), @@ -964,11 +1095,13 @@ def test_materialize_experiment_data_store_handles_same_component_rewrite(tmp_pa ) root = zarr.open_group(str(source_store), mode="r") - assert tuple(root["data"].shape) == (1, 1, 1, 2, 3, 4) - assert tuple(root["data"].chunks) == (1, 1, 1, 2, 2, 2) - assert np.array_equal(np.array(root["data"][0, 0, 0, :, :, :]), source_data) - assert root.attrs["data_pyramid_levels"] == ["data", "data_pyramid/level_1"] - assert tuple(root["data_pyramid/level_1"].shape) == (1, 1, 1, 1, 2, 2) + assert tuple(root["data"].shape) == (2, 3, 4) + assert tuple(root["data"].chunks) == (1, 3, 4) + assert np.array_equal(np.array(root["data"][:]), source_data) + assert "data_pyramid" not in root + sibling_root = zarr.open_group(str(materialized.store_path), mode="r") + assert tuple(sibling_root["data"].shape) == (1, 1, 1, 2, 3, 4) + assert tuple(sibling_root["data_pyramid/level_1"].shape) == (1, 1, 1, 1, 2, 2) def test_materialize_experiment_data_store_stacks_tiff_positions_and_channels( @@ -1113,42 +1246,21 @@ def test_materialize_experiment_data_store_stacks_bdv_n5_setups( experiment = load_navigate_experiment(experiment_path) source_path = tmp_path / "CH00_000000.n5" - source_root = zarr.open_group(str(source_path), mode="w") expected_blocks = { (0, 0): np.full((2, 3, 4), fill_value=11, dtype=np.uint16), (1, 0): np.full((2, 3, 4), fill_value=21, dtype=np.uint16), (0, 1): np.full((2, 3, 4), fill_value=31, dtype=np.uint16), (1, 1): np.full((2, 3, 4), fill_value=41, dtype=np.uint16), } - source_root.create_dataset( - "setup0/timepoint0/s0", - data=expected_blocks[(0, 0)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup1/timepoint0/s0", - data=expected_blocks[(1, 0)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup2/timepoint0/s0", - data=expected_blocks[(0, 1)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup3/timepoint0/s0", - data=expected_blocks[(1, 1)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup99/timepoint0/s0", - data=np.zeros((2, 3, 4), dtype=np.uint16), - chunks=(1, 3, 4), - overwrite=True, + _write_real_n5_store( + source_path, + { + "setup0/timepoint0/s0": expected_blocks[(0, 0)], + "setup1/timepoint0/s0": expected_blocks[(1, 0)], + "setup2/timepoint0/s0": expected_blocks[(0, 1)], + "setup3/timepoint0/s0": expected_blocks[(1, 1)], + "setup99/timepoint0/s0": np.zeros((2, 3, 4), dtype=np.uint16), + }, ) _write_bdv_xml( @@ -1196,7 +1308,7 @@ def test_materialize_experiment_data_store_stacks_bdv_ome_zarr_setups( experiment = load_navigate_experiment(experiment_path) source_path = tmp_path / "CH00_000000.ome.zarr" - source_root = zarr.open_group(str(source_path), mode="w") + source_root = _open_test_zarr_group(source_path, mode="w") expected_blocks = { (0, 0): np.full((2, 3, 4), fill_value=12, dtype=np.uint16), (1, 0): np.full((2, 3, 4), fill_value=22, dtype=np.uint16), @@ -1304,7 +1416,7 @@ def test_materialize_experiment_data_store_uses_source_aligned_plane_writes( source_data = np.arange(4 * 8 * 10, dtype=np.uint16).reshape(4, 8, 10) source_store = tmp_path / "source.ome.zarr" - source_root = zarr.open_group(str(source_store), mode="w") + source_root = _open_test_zarr_group(source_store, mode="w") source_root.create_dataset("raw", data=source_data, chunks=(1, 8, 10), overwrite=True) source_root["raw"].attrs["_ARRAY_DIMENSIONS"] = ["z", "y", "x"] @@ -1361,7 +1473,7 @@ def test_materialize_experiment_data_store_falls_back_to_chunk_batched_writes( source_data = np.arange(4 * 8 * 10, dtype=np.uint16).reshape(4, 8, 10) source_store = tmp_path / "source.ome.zarr" - source_root = zarr.open_group(str(source_store), mode="w") + source_root = _open_test_zarr_group(source_store, mode="w") source_root.create_dataset("raw", data=source_data, chunks=(2, 8, 10), overwrite=True) source_root["raw"].attrs["_ARRAY_DIMENSIONS"] = ["z", "y", "x"] diff --git a/tests/io/test_provenance.py b/tests/io/test_provenance.py index 776def4..fd1d442 100644 --- a/tests/io/test_provenance.py +++ b/tests/io/test_provenance.py @@ -43,9 +43,60 @@ verify_provenance_chain, ) from clearex.io.read import ImageInfo +from clearex.io.zarr_storage import ( + create_or_overwrite_array, + open_group as open_zarr_group, +) from clearex.workflow import SpatialCalibrationConfig, WorkflowConfig +def _wrap_test_zarr_group(group): + class _CompatZarrGroup: + def __init__(self, inner_group): + self._inner_group = inner_group + + def __getattr__(self, name): + return getattr(self._inner_group, name) + + def __contains__(self, key): + return key in self._inner_group + + def __delitem__(self, key): + del self._inner_group[key] + + def __getitem__(self, key): + item = self._inner_group[key] + if hasattr(item, "array_keys") and hasattr(item, "group_keys"): + return _wrap_test_zarr_group(item) + return item + + def create_dataset(self, name, **kwargs): + return create_or_overwrite_array(root=self._inner_group, name=name, **kwargs) + + def create_group(self, name, **kwargs): + return _wrap_test_zarr_group(self._inner_group.create_group(name, **kwargs)) + + def require_group(self, name, **kwargs): + return _wrap_test_zarr_group( + self._inner_group.require_group(name, **kwargs) + ) + + return _CompatZarrGroup(group) + + +def _open_test_zarr_group( + path: Path | str, + *, + mode: str = "a", + zarr_format: int | None = None, +): + if zarr_format is None and mode in {"w", "w-"}: + zarr_format = 2 + return _wrap_test_zarr_group( + open_zarr_group(path, mode=mode, zarr_format=zarr_format) + ) + + def test_is_zarr_store_path(): assert is_zarr_store_path("sample.zarr") is True assert is_zarr_store_path("sample.n5") is True @@ -54,7 +105,7 @@ def test_is_zarr_store_path(): def test_persist_run_provenance_hash_chain(tmp_path: Path): store_path = tmp_path / "provenance_test.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") workflow = WorkflowConfig( file=str(store_path), @@ -107,7 +158,7 @@ def test_persist_run_provenance_hash_chain(tmp_path: Path): def test_persist_run_provenance_records_spatial_calibration(tmp_path: Path) -> None: store_path = tmp_path / "spatial_provenance.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") workflow = WorkflowConfig( file=str(store_path), visualization=True, @@ -136,7 +187,7 @@ def test_persist_run_provenance_records_spatial_calibration(tmp_path: Path) -> N def test_verify_provenance_chain_detects_tampering(tmp_path: Path): store_path = tmp_path / "tamper_test.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") workflow = WorkflowConfig(file=str(store_path), visualization=True) image_info = ImageInfo(path=store_path, shape=(2, 2), dtype=np.uint8) @@ -160,7 +211,7 @@ def test_verify_provenance_chain_detects_tampering(tmp_path: Path): def test_store_latest_analysis_output_overwrites_previous_version(tmp_path: Path): store_path = tmp_path / "output_policy_test.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") first = np.zeros((4, 4), dtype=np.uint8) second = np.ones((4, 4), dtype=np.uint8) @@ -196,7 +247,7 @@ def test_store_latest_analysis_output_overwrites_previous_version(tmp_path: Path def test_summarize_analysis_history_reports_matching_parameters(tmp_path: Path): store_path = tmp_path / "history_test.zarr" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( name="data", shape=(1, 1, 1, 2, 2, 2), @@ -254,7 +305,7 @@ def test_summarize_analysis_history_reports_matching_parameters(tmp_path: Path): def test_summarize_analysis_history_ignores_skipped_steps(tmp_path: Path): store_path = tmp_path / "history_skip_test.zarr" - root = zarr.open_group(str(store_path), mode="w") + root = _open_test_zarr_group(store_path, mode="w") root.create_dataset( name="data", shape=(1, 1, 1, 2, 2, 2), @@ -304,7 +355,7 @@ def test_load_latest_completed_workflow_state_skips_cancelled_runs( tmp_path: Path, ) -> None: store_path = tmp_path / "latest_completed_workflow.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") completed_workflow = WorkflowConfig(file=str(store_path), visualization=True) image_info = ImageInfo(path=store_path, shape=(2, 2), dtype=np.uint8) @@ -333,7 +384,7 @@ def test_load_latest_completed_workflow_state_skips_cancelled_runs( def test_latest_analysis_gui_state_round_trip(tmp_path: Path) -> None: store_path = tmp_path / "analysis_gui_state.zarr" - zarr.open_group(str(store_path), mode="w") + _open_test_zarr_group(store_path, mode="w") payload = { "flatfield": True, "deconvolution": False, diff --git a/tests/io/test_read.py b/tests/io/test_read.py index 327d858..b175a25 100644 --- a/tests/io/test_read.py +++ b/tests/io/test_read.py @@ -35,7 +35,6 @@ import dask.array as da from numpy.typing import NDArray import tifffile -import zarr from numcodecs import Blosc import h5py @@ -49,6 +48,10 @@ NumpyReader, ImageOpener, ) +from clearex.io.zarr_storage import ( + create_or_overwrite_array, + open_group as open_zarr_group, +) from tests import download_test_registration_data @@ -57,6 +60,53 @@ # ============================================================================= +def _wrap_test_zarr_group(group): + class _CompatZarrGroup: + def __init__(self, inner_group): + self._inner_group = inner_group + + def __getattr__(self, name): + return getattr(self._inner_group, name) + + def __contains__(self, key): + return key in self._inner_group + + def __delitem__(self, key): + del self._inner_group[key] + + def __getitem__(self, key): + item = self._inner_group[key] + if hasattr(item, "array_keys") and hasattr(item, "group_keys"): + return _wrap_test_zarr_group(item) + return item + + def create_dataset(self, name, **kwargs): + return create_or_overwrite_array(root=self._inner_group, name=name, **kwargs) + + def create_group(self, name, **kwargs): + return _wrap_test_zarr_group(self._inner_group.create_group(name, **kwargs)) + + def require_group(self, name, **kwargs): + return _wrap_test_zarr_group( + self._inner_group.require_group(name, **kwargs) + ) + + return _CompatZarrGroup(group) + + +def _open_test_zarr_group( + path: Path | str, + *, + mode: str = "a", + zarr_format: int | None = None, +): + if zarr_format is None and mode in {"w", "w-"}: + zarr_format = 2 + return _wrap_test_zarr_group( + open_zarr_group(path, mode=mode, zarr_format=zarr_format) + ) + + class TestImageInfo: """Test suite for the ImageInfo dataclass.""" @@ -247,7 +297,6 @@ def open( info = ImageInfo(path=path, shape=arr.shape, dtype=arr.dtype) return arr, info - reader = EmptySuffixReader() assert EmptySuffixReader.claims(Path("any.file")) is False def test_open_method_signature(self): @@ -698,7 +747,7 @@ def temp_zarr_2d(self, tmp_path): arr = np.random.randint(0, 65535, size=(256, 256), dtype=np.uint16) zarr_path = tmp_path / "test_2d.zarr" # Create as a group with a single array - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) return zarr_path, arr @@ -709,7 +758,7 @@ def temp_zarr_3d(self, tmp_path): arr = np.random.randint(0, 65535, size=(10, 256, 256), dtype=np.uint16) zarr_path = tmp_path / "test_3d.zarr" # Create as a group with a single array - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) return zarr_path, arr @@ -720,7 +769,7 @@ def temp_zarr_with_attrs(self, tmp_path): arr = np.random.rand(10, 100, 100).astype(np.float32) zarr_path = tmp_path / "test_with_attrs.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") z = root.create_dataset( "data", data=arr, @@ -737,7 +786,7 @@ def temp_zarr_multiarray(self, tmp_path): """Fixture to create a Zarr store with multiple arrays.""" zarr_path = tmp_path / "test_multi.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") # Create multiple arrays with different sizes arr_small = np.random.rand(50, 50).astype(np.float32) @@ -757,7 +806,7 @@ def temp_zarr_with_ome_attrs(self, tmp_path): arr = np.random.randint(0, 255, size=(5, 3, 100, 100), dtype=np.uint8) zarr_path = tmp_path / "test_ome.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") z = root.create_dataset("data", data=arr, chunks=(1, 1, 50, 50)) z.attrs["multiscales"] = [ { @@ -956,7 +1005,7 @@ def test_open_different_dtypes(self, zarr_reader, tmp_path): arr_in = (arr_in * np.iinfo(dtype).max).astype(dtype) zarr_path = tmp_path / f"test_{dtype.__name__}.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr_in) arr_out, info = zarr_reader.open(zarr_path) @@ -978,7 +1027,7 @@ def test_open_various_shapes(self, zarr_reader, tmp_path): for idx, shape in enumerate(shapes): arr_in = np.random.randint(0, 255, size=shape, dtype=np.uint8) zarr_path = tmp_path / f"test_{len(shape)}d.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr_in) arr_out, info = zarr_reader.open(zarr_path) @@ -1034,7 +1083,7 @@ def test_large_zarr_with_dask(self, zarr_reader, tmp_path): # Create a moderately large Zarr store (not huge to keep test fast) arr = np.random.randint(0, 255, size=(50, 512, 512), dtype=np.uint8) zarr_path = tmp_path / "large.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) # Open with Dask should not load into memory immediately @@ -1055,7 +1104,7 @@ def test_zarr_native_chunking(self, zarr_reader, tmp_path): zarr_path = tmp_path / "chunked.zarr" # Create with specific chunking - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") z = root.create_dataset( "data", shape=arr.shape, @@ -1076,7 +1125,7 @@ def test_zarr_with_nested_groups(self, zarr_reader, tmp_path): """Test opening a Zarr store with nested groups.""" zarr_path = tmp_path / "nested.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") # Create nested structure grp1 = root.create_group("group1") @@ -1106,7 +1155,7 @@ def test_zarr_with_nested_only_arrays(self, zarr_reader, tmp_path): """Test opening a store where arrays exist only in nested groups.""" zarr_path = tmp_path / "nested_only.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") nested = root.create_group("setup0").create_group("timepoint0") expected = np.random.randint(0, 255, size=(30, 32, 32), dtype=np.uint8) nested.create_dataset("s0", data=expected, chunks=(2, 16, 16)) @@ -1123,7 +1172,7 @@ def test_zarr_empty_store_raises_error(self, zarr_reader, tmp_path): zarr_path = tmp_path / "empty.zarr" # Create an empty group with no arrays - zarr.open_group(str(zarr_path), mode="w") + _open_test_zarr_group(zarr_path, mode="w") with pytest.raises(ValueError, match="No arrays found"): zarr_reader.open(zarr_path) @@ -1147,6 +1196,21 @@ def test_zarr_axes_from_attrs(self, zarr_reader, temp_zarr_with_attrs): assert info.axes is not None assert info.axes == ["z", "y", "x"] + def test_zarr_axes_from_ome_v05_metadata(self, zarr_reader, tmp_path): + """Test extraction of axes from OME-Zarr 0.5-style root attrs.""" + zarr_path = tmp_path / "ome_v05.zarr" + root = _open_test_zarr_group(zarr_path, mode="w") + root.attrs["ome"] = { + "multiscales": [ + {"axes": [{"name": "z"}, {"name": "y"}, {"name": "x"}]} + ] + } + root.create_dataset("data", data=np.arange(24, dtype=np.uint16).reshape(2, 3, 4)) + + _, info = zarr_reader.open(zarr_path, prefer_dask=False) + + assert info.axes == ["z", "y", "x"] + def test_zarr_readonly_mode(self, zarr_reader, temp_zarr_2d): """Test that ZarrReader opens stores in read-only mode.""" zarr_path, _ = temp_zarr_2d @@ -1162,7 +1226,7 @@ def test_zarr_dask_lazy_evaluation(self, zarr_reader, tmp_path): arr = np.random.rand(100, 100, 100).astype(np.float32) zarr_path = tmp_path / "lazy.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) # Open as Dask @@ -1184,7 +1248,7 @@ def test_zarr_compressor_preserved(self, zarr_reader, tmp_path): zarr_path = tmp_path / "compressed.zarr" # Create with specific compressor - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") z = root.create_dataset( "data", shape=arr.shape, @@ -1621,7 +1685,7 @@ def test_hdf5_empty_file_raises_error(self, hdf5_reader, tmp_path): hdf5_path = tmp_path / "empty.h5" # Create an empty file with no datasets - with h5py.File(str(hdf5_path), "w") as f: + with h5py.File(str(hdf5_path), "w"): pass with pytest.raises(ValueError, match="No datasets found"): @@ -2281,11 +2345,9 @@ def temp_tiff_file(self, tmp_path): @pytest.fixture def temp_zarr_file(self, tmp_path): """Fixture to create a temporary Zarr store.""" - import zarr - arr = np.random.rand(100, 100).astype(np.float32) zarr_path = tmp_path / "test.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) return zarr_path, arr @@ -2608,11 +2670,9 @@ def test_open_tiff_with_metadata(self, image_opener, tmp_path): def test_open_zarr_with_dask_chunking(self, image_opener, tmp_path): """Test opening Zarr with Dask and custom chunking.""" - import zarr - arr = np.random.rand(200, 200).astype(np.float32) zarr_path = tmp_path / "test_chunked.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr, chunks=(100, 100)) darr, info = image_opener.open(zarr_path, prefer_dask=True, chunks=(50, 50)) @@ -2689,12 +2749,10 @@ def test_opener_with_duplicate_readers(self): def test_open_large_file_with_dask(self, image_opener, tmp_path): """Test opening a large file with Dask for lazy loading.""" - import zarr - # Create a moderately large file arr = np.random.rand(200, 512, 512).astype(np.float32) zarr_path = tmp_path / "large.zarr" - root = zarr.open_group(str(zarr_path), mode="w") + root = _open_test_zarr_group(zarr_path, mode="w") root.create_dataset("data", data=arr) darr, info = image_opener.open( diff --git a/uv.lock b/uv.lock index d47586b..ae74296 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" resolution-markers = [ "sys_platform == 'darwin'", @@ -7,6 +7,89 @@ resolution-markers = [ "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] +[[package]] +name = "aiobotocore" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "aioitertools" }, + { name = "botocore" }, + { name = "jmespath" }, + { name = "multidict" }, + { name = "python-dateutil" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/9f/a0568deaf008f4a7e3d57a7f80f1537df894df0e49bd4a790bb22f9a2d8e/aiobotocore-3.3.0.tar.gz", hash = "sha256:9abc21d91edd6c9c2e4a07e11bdfcbb159f0b9116ab2a0a5a349113533a18fb2", size = 122940, upload-time = "2026-03-18T09:58:49.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/54/a295bd8d7ac900c339b2c7024ed0ff9538afb60e92eb0979b8bb49deb20e/aiobotocore-3.3.0-py3-none-any.whl", hash = "sha256:9125ab2b63740dfe3b66b8d5a90d13aed9587b850aa53225ef214a04a1aa7fdc", size = 87817, upload-time = "2026-03-18T09:58:47.466Z" }, +] + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, +] + +[[package]] +name = "aiohttp" +version = "3.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohappyeyeballs" }, + { name = "aiosignal" }, + { name = "attrs" }, + { name = "frozenlist" }, + { name = "multidict" }, + { name = "propcache" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" }, + { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" }, + { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" }, + { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" }, + { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" }, + { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" }, + { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" }, + { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" }, + { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" }, +] + +[[package]] +name = "aioitertools" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload-time = "2025-11-06T22:17:07.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload-time = "2025-11-06T22:17:06.502Z" }, +] + +[[package]] +name = "aiosignal" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "frozenlist" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, +] + [[package]] name = "alabaster" version = "0.7.16" @@ -179,12 +262,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/c9/d7977eaacb9df673210491da99e6a247e93df98c715fc43fd136ce1d3d33/arrow-1.4.0-py3-none-any.whl", hash = "sha256:749f0769958ebdc79c173ff0b0670d59051a535fa26e8eba02953dc19eb43205", size = 68797, upload-time = "2025-10-18T17:46:45.663Z" }, ] -[[package]] -name = "asciitree" -version = "0.3.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/885bc91484e1aa8f618f6f0228d76d0e67000b0fdd6090673b777e311913/asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e", size = 3951, upload-time = "2016-09-05T19:10:42.681Z" } - [[package]] name = "asttokens" version = "3.0.1" @@ -329,6 +406,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/0b/bdf449df87be3f07b23091ceafee8c3ef569cf6d2fb7edec6e3b12b3faa4/bokeh-3.9.0-py3-none-any.whl", hash = "sha256:b252bfb16a505f0e0c57d532d0df308ae1667235bafc622aa9441fe9e7c5ce4a", size = 6396068, upload-time = "2026-03-11T17:58:31.645Z" }, ] +[[package]] +name = "botocore" +version = "1.42.70" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/54/b80e1fcee4f732e0e9314bbb8679be9d5690caa1566c4a4cd14e9724d2dd/botocore-1.42.70.tar.gz", hash = "sha256:9ee17553b7febd1a0c1253b3b62ab5d79607eb6163c8fb943470a8893c31d4fa", size = 14997068, upload-time = "2026-03-17T19:43:10.678Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/51/08f32aea872253173f513ba68122f4300966290677c8e59887b4ffd5d957/botocore-1.42.70-py3-none-any.whl", hash = "sha256:54ed9d25f05f810efd22b0dfda0bb9178df3ad8952b2e4359e05156c9321bd3c", size = 14671393, upload-time = "2026-03-17T19:43:06.777Z" }, +] + [[package]] name = "build" version = "1.4.0" @@ -525,6 +616,10 @@ usegment3d = [ { name = "cellpose" }, { name = "u-segment3d" }, ] +viewer = [ + { name = "napari-ome-zarr" }, + { name = "ome-zarr" }, +] [package.metadata] requires-dist = [ @@ -534,19 +629,21 @@ requires-dist = [ { name = "cellpose", marker = "extra == 'usegment3d'", specifier = "<3" }, { name = "codespell", marker = "extra == 'docs'" }, { name = "cython", specifier = ">=3.1.4" }, - { name = "dask", specifier = "==2025.1.0" }, + { name = "dask", specifier = "==2026.1.1" }, { name = "dask-image" }, { name = "dask-jobqueue" }, - { name = "distributed", specifier = "==2025.1.0" }, + { name = "distributed", specifier = "==2026.1.1" }, { name = "h5py" }, { name = "imagecodecs", specifier = ">=2024.9.22,<2025" }, { name = "jupyterlab" }, { name = "matplotlib" }, { name = "mss", marker = "extra == 'docs'" }, { name = "napari", specifier = ">0.6.1" }, + { name = "napari-ome-zarr", marker = "extra == 'viewer'", specifier = ">=0.7.2" }, { name = "nbconvert", marker = "extra == 'docs'" }, { name = "neuroglancer", specifier = ">=2.40.1,<3.0.0" }, { name = "numpydoc", marker = "extra == 'docs'" }, + { name = "ome-zarr", marker = "extra == 'viewer'", specifier = ">=0.14.0" }, { name = "opencv-python" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "pandas-stubs", marker = "extra == 'dev'", specifier = "~=2.3.3" }, @@ -571,12 +668,12 @@ requires-dist = [ { name = "sphinx-issues", marker = "extra == 'docs'" }, { name = "sphinx-rtd-theme", marker = "extra == 'docs'" }, { name = "sphinx-toolbox", marker = "extra == 'docs'" }, - { name = "tifffile", specifier = "==2025.1.10" }, + { name = "tifffile", specifier = "==2025.10.16" }, { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a32" }, { name = "u-segment3d", marker = "extra == 'usegment3d'", specifier = ">=0.1.4,<0.2" }, - { name = "zarr", specifier = "<3.0" }, + { name = "zarr", specifier = ">=3,<4" }, ] -provides-extras = ["decon", "usegment3d", "dev", "docs"] +provides-extras = ["decon", "usegment3d", "viewer", "dev", "docs"] [[package]] name = "click" @@ -819,7 +916,7 @@ wheels = [ [[package]] name = "dask" -version = "2025.1.0" +version = "2026.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -830,9 +927,9 @@ dependencies = [ { name = "pyyaml" }, { name = "toolz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ff/41/43eb54e0f6d1ba971d5adcad8f0862b327af6a2041aa134acbcec630ad43/dask-2025.1.0.tar.gz", hash = "sha256:bb807586ff20f0f59f3d36fe34eb4a95f75a1aae2a775b521de6dd53727d2063", size = 10758681, upload-time = "2025-01-17T16:54:13.728Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/46/61ecde57bac647ca7eb6ffef8dcd90af6c1c649020874cd7fd8738003d62/dask-2026.1.1.tar.gz", hash = "sha256:12b1dbb0d6e92f287feb4076871600b2fba3a843d35ff214776ada5e9e7a1529", size = 10994732, upload-time = "2026-01-16T12:35:30.258Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/a0/016d956a3fec193e3a5b466ca912944669c18dccc736b64a9e28ccdcc5f7/dask-2025.1.0-py3-none-any.whl", hash = "sha256:db86220c8d19bdf464cbe11a87a2c8f5d537acf586bb02eed6d61a302af5c2fd", size = 1371235, upload-time = "2025-01-17T16:54:09.918Z" }, + { url = "https://files.pythonhosted.org/packages/e9/4b/9cc373120658a2516aa5f6dcdde631c95d714b876d29ad8f8e009d793f3f/dask-2026.1.1-py3-none-any.whl", hash = "sha256:146b0ef2918eb581e06139183a88801b4a8c52d7c37758a91f8c3b75c54b0e15", size = 1481492, upload-time = "2026-01-16T12:35:22.602Z" }, ] [package.optional-dependencies] @@ -960,7 +1057,7 @@ wheels = [ [[package]] name = "distributed" -version = "2025.1.0" +version = "2026.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -979,9 +1076,9 @@ dependencies = [ { name = "urllib3" }, { name = "zict" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c7/8e/f35c479339261a88cf6209f145d4f55316b60b6d6548832ebf17c3ced072/distributed-2025.1.0.tar.gz", hash = "sha256:8924c49adae0fc8532b464e94bdfea979c08c67835bafb5f315f33cc0ab14dd3", size = 1109046, upload-time = "2025-01-17T16:54:39.638Z" } +sdist = { url = "https://files.pythonhosted.org/packages/45/19/0c13efdffc55cb311594f66c1c8d36a3c4711e427c820155fb9c59138b5e/distributed-2026.1.1.tar.gz", hash = "sha256:3d2709a43912797df3c345af3bb333bbf1a386ec1e9e6a134e5f050521373dbd", size = 2102870, upload-time = "2026-01-16T12:34:58.258Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/c6/ab0d2104364653897a2bad66d5da9dbf282897b126d3690c92c7d4b23b35/distributed-2025.1.0-py3-none-any.whl", hash = "sha256:0b9c0ebcab8fe25322d71d04deb6d8ed8c7bbfd1521d1906d5d71bf82eee5ae3", size = 1018174, upload-time = "2025-01-17T16:54:37.577Z" }, + { url = "https://files.pythonhosted.org/packages/b9/dc/6d709bcf8fed7611d8a510aeed23b0436cf6db5b61e63c8eb8451eb0d4d8/distributed-2026.1.1-py3-none-any.whl", hash = "sha256:506759b1ed88e45e12ba65e2a429de9911862db55d27dd8bb293c6268430374e", size = 1008417, upload-time = "2026-01-16T12:34:55.535Z" }, ] [[package]] @@ -1015,6 +1112,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/11/208f72084084d3f6a2ed5ebfdfc846692c3f7ad6dce65e400194924f7eed/domdf_python_tools-3.10.0-py3-none-any.whl", hash = "sha256:5e71c1be71bbcc1f881d690c8984b60e64298ec256903b3147f068bc33090c36", size = 126860, upload-time = "2025-02-12T17:34:04.093Z" }, ] +[[package]] +name = "donfig" +version = "0.8.1.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" }, +] + [[package]] name = "edt" version = "3.0.0" @@ -1163,6 +1272,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/6e/bd7fbfacca077bc6f34f1a1109800a2c41ab50f4704d3a0507ba41009915/freetype_py-2.5.1-py3-none-win_amd64.whl", hash = "sha256:0b7f8e0342779f65ca13ef8bc103938366fecade23e6bb37cb671c2b8ad7f124", size = 814608, upload-time = "2024-08-29T18:32:24.648Z" }, ] +[[package]] +name = "frozenlist" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" }, + { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" }, + { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" }, + { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" }, + { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" }, + { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" }, + { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" }, + { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" }, + { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" }, + { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" }, + { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" }, + { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" }, + { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, +] + [[package]] name = "fsspec" version = "2026.2.0" @@ -1172,6 +1306,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, ] +[package.optional-dependencies] +s3 = [ + { name = "s3fs" }, +] + [[package]] name = "google-apitools" version = "0.5.35" @@ -1201,6 +1340,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" }, ] +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" }, +] + [[package]] name = "gradient-free-optimizers" version = "1.10.1" @@ -1495,6 +1647,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -1941,6 +2102,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/28/1e3e5cd1d677cca68b26166f704f72e35b1e8b6d5076d8ebeebc4e40a649/mss-10.1.0-py3-none-any.whl", hash = "sha256:9179c110cadfef5dc6dc4a041a0cd161c74c379218648e6640b48c6b5cfe8918", size = 24525, upload-time = "2025-08-16T12:10:59.111Z" }, ] +[[package]] +name = "multidict" +version = "6.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" }, + { url = "https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" }, + { url = "https://files.pythonhosted.org/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" }, + { url = "https://files.pythonhosted.org/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" }, + { url = "https://files.pythonhosted.org/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" }, + { url = "https://files.pythonhosted.org/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" }, + { url = "https://files.pythonhosted.org/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" }, + { url = "https://files.pythonhosted.org/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" }, + { url = "https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" }, + { url = "https://files.pythonhosted.org/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" }, + { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, +] + [[package]] name = "multiprocess" version = "0.70.19" @@ -2026,6 +2214,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/72/2067f28fd0ae87978f3b61e8ec30c1d085bbed03f64eb58e43949d526b3a/napari_console-0.1.4-py3-none-any.whl", hash = "sha256:565df1fa15db579552af9e9d9d3883067c00191be282ad47d80f9b0d50b4e5ad", size = 9786, upload-time = "2025-10-15T14:24:17.677Z" }, ] +[[package]] +name = "napari-ome-zarr" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "napari" }, + { name = "ome-zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/93/5c4aa0638c0f52fb471c0cc20769f028ec4e7f52392208de9ed40bde6684/napari_ome_zarr-0.7.2.tar.gz", hash = "sha256:6ccf7f0ca31d0199094d2df93f521168d43edf8126387baa53a9365da529fd95", size = 22144, upload-time = "2026-03-17T11:33:12.092Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/bc/b472c0a4be703045b1263ca6ce6912c8a4719b1dfbd3b5e0db9f628202ee/napari_ome_zarr-0.7.2-py3-none-any.whl", hash = "sha256:06833f2e8db73a6d91b8587b3588c4001a01305c9574e1e8ea479a40ac310746", size = 10024, upload-time = "2026-03-17T11:33:11.092Z" }, +] + [[package]] name = "napari-plugin-engine" version = "0.2.1" @@ -2426,6 +2627,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl", hash = "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", size = 98206, upload-time = "2018-09-07T21:38:16.742Z" }, ] +[[package]] +name = "ome-zarr" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "dask" }, + { name = "deprecated" }, + { name = "fsspec", extra = ["s3"] }, + { name = "numpy" }, + { name = "rangehttpserver" }, + { name = "requests" }, + { name = "scikit-image" }, + { name = "toolz" }, + { name = "zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3b/29/c69ebfc1082eb947c260ed47b6aa74780e82099558f4395668ae04562e4e/ome_zarr-0.14.0.tar.gz", hash = "sha256:96728fc03388110da436b2dbe35176887ad6dd8a1e27ee9d3ea75049451ca8e1", size = 76421, upload-time = "2026-03-10T22:18:44.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ff/160e3098a6dadf1cca21a22d6ca69761bc900a60a7b8ac0adde7c58eccb4/ome_zarr-0.14.0-py3-none-any.whl", hash = "sha256:835e8348f7d6bbced6f0e7fc35b17ac5cca4d56ac26cccd310f45a0d6672eafb", size = 45058, upload-time = "2026-03-10T22:18:42.899Z" }, +] + [[package]] name = "opencv-python" version = "4.11.0.86" @@ -2701,6 +2923,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, ] +[[package]] +name = "propcache" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" }, + { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" }, + { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" }, + { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" }, + { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" }, + { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, +] + [[package]] name = "psfmodels" version = "0.3.4.dev30+gdfe2b6f2e" @@ -3196,6 +3442,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/76/37c0ccd5ab968a6a438f9c623aeecc84c202ab2fabc6a8fd927580c15b5a/QtPy-2.4.3-py3-none-any.whl", hash = "sha256:72095afe13673e017946cc258b8d5da43314197b741ed2890e563cf384b51aa1", size = 95045, upload-time = "2025-02-11T15:09:24.162Z" }, ] +[[package]] +name = "rangehttpserver" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/3b/1ec139d6028c6e5eb10301f040d6eee5c5427a4b1b4d614a2f78d3bba1bd/rangehttpserver-1.4.0.tar.gz", hash = "sha256:d5ddccee219b359598e41da0c5fbf30a2579297094f5a682755e2586388a5306", size = 6993, upload-time = "2024-08-27T18:08:43.418Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/43/d7e2b9ad768c07b5473bea3ac7db9ca4d995c09399cbea3d4df1c0bd4955/rangehttpserver-1.4.0-py2.py3-none-any.whl", hash = "sha256:2a0c6926e4341de4cc19ec861292b005e4194ff497b1eefdeccb2992a5045452", size = 7773, upload-time = "2024-08-27T18:08:41.861Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -3382,6 +3637,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, ] +[[package]] +name = "s3fs" +version = "2026.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiobotocore" }, + { name = "aiohttp" }, + { name = "fsspec" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/be/392c8c5e0da9bfa139e41084690dd49a5e3e931099f78f52d3f6070105c6/s3fs-2026.2.0.tar.gz", hash = "sha256:91cb2a9f76e35643b76eeac3f47a6165172bb3def671f76b9111c8dd5779a2ac", size = 84152, upload-time = "2026-02-05T21:57:57.968Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/e1/64c264db50b68de8a438b60ceeb921b2f22da3ebb7ad6255150225d0beac/s3fs-2026.2.0-py3-none-any.whl", hash = "sha256:65198835b86b1d5771112b0085d1da52a6ede36508b1aaa6cae2aedc765dfe10", size = 31328, upload-time = "2026-02-05T21:57:56.532Z" }, +] + [[package]] name = "scikit-fmm" version = "2025.6.23" @@ -3893,14 +4162,14 @@ wheels = [ [[package]] name = "tifffile" -version = "2025.1.10" +version = "2025.10.16" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d5/fc/697d8dac6936a81eda88e7d4653d567fcb0d504efad3fd28f5272f96fcf9/tifffile-2025.1.10.tar.gz", hash = "sha256:baaf0a3b87bf7ec375fa1537503353f70497eabe1bdde590f2e41cc0346e612f", size = 365585, upload-time = "2025-01-10T19:55:24.166Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/b5/0d8f3d395f07d25ec4cafcdfc8cab234b2cc6bf2465e9d7660633983fe8f/tifffile-2025.10.16.tar.gz", hash = "sha256:425179ec7837ac0e07bc95d2ea5bea9b179ce854967c12ba07fc3f093e58efc1", size = 371848, upload-time = "2025-10-16T22:56:09.043Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/59/50/7bef6a1259a2c4b81823653a69d2d51074f7b8095db2abae5abee962ab87/tifffile-2025.1.10-py3-none-any.whl", hash = "sha256:ed24cf4c99fb13b4f5fb29f8a0d5605e60558c950bccbdca2a6470732a27cfb3", size = 227551, upload-time = "2025-01-10T19:55:21.055Z" }, + { url = "https://files.pythonhosted.org/packages/e6/5e/56c751afab61336cf0e7aa671b134255a30f15f59cd9e04f59c598a37ff5/tifffile-2025.10.16-py3-none-any.whl", hash = "sha256:41463d979c1c262b0a5cdef2a7f95f0388a072ad82d899458b154a48609d759c", size = 231162, upload-time = "2025-10-16T22:56:07.214Z" }, ] [[package]] @@ -4283,19 +4552,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/5c/2c189d18d495dd0fa3f27ccc60762bbc787eed95b9b0147266e72bb76585/xyzservices-2025.11.0-py3-none-any.whl", hash = "sha256:de66a7599a8d6dad63980b77defd1d8f5a5a9cb5fc8774ea1c6e89ca7c2a3d2f", size = 93916, upload-time = "2025-11-22T11:31:50.525Z" }, ] +[[package]] +name = "yarl" +version = "1.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "multidict" }, + { name = "propcache" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" }, + { url = "https://files.pythonhosted.org/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" }, + { url = "https://files.pythonhosted.org/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" }, + { url = "https://files.pythonhosted.org/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" }, + { url = "https://files.pythonhosted.org/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" }, + { url = "https://files.pythonhosted.org/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" }, + { url = "https://files.pythonhosted.org/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" }, + { url = "https://files.pythonhosted.org/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" }, + { url = "https://files.pythonhosted.org/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" }, + { url = "https://files.pythonhosted.org/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" }, + { url = "https://files.pythonhosted.org/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" }, + { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, +] + [[package]] name = "zarr" -version = "2.18.7" +version = "3.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "asciitree" }, - { name = "fasteners", marker = "sys_platform != 'emscripten'" }, + { name = "donfig" }, + { name = "google-crc32c" }, { name = "numcodecs" }, { name = "numpy" }, + { name = "packaging" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/1d/01cf9e3ab2d85190278efc3fca9f68563de35ae30ee59e7640e3af98abe3/zarr-2.18.7.tar.gz", hash = "sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5", size = 3604558, upload-time = "2025-04-09T07:59:28.482Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/76/7fa87f57c112c7b9c82f0a730f8b6f333e792574812872e2cd45ab604199/zarr-3.1.5.tar.gz", hash = "sha256:fbe0c79675a40c996de7ca08e80a1c0a20537bd4a9f43418b6d101395c0bba2b", size = 366825, upload-time = "2025-11-21T14:06:01.492Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/d8/9ffd8c237b3559945bb52103cf0eed64ea098f7b7f573f8d2962ef27b4b2/zarr-2.18.7-py3-none-any.whl", hash = "sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223", size = 211273, upload-time = "2025-04-09T07:59:27.039Z" }, + { url = "https://files.pythonhosted.org/packages/44/15/bb13b4913ef95ad5448490821eee4671d0e67673342e4d4070854e5fe081/zarr-3.1.5-py3-none-any.whl", hash = "sha256:29cd905afb6235b94c09decda4258c888fcb79bb6c862ef7c0b8fe009b5c8563", size = 284067, upload-time = "2025-11-21T14:05:59.235Z" }, ] [[package]] From bd356d381aad8dc63f330cae0904d8f3db587d58 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 10:00:50 -0500 Subject: [PATCH 05/10] Harden Zarr v3 migration compatibility across tests --- src/clearex/visualization/pipeline.py | 2 +- tests/conftest.py | 57 +++++++++++++++++++++++++++ tests/flatfield/test_pipeline.py | 37 +++++++++++------ 3 files changed, 84 insertions(+), 12 deletions(-) diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index 9cd242f..1329a2d 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -2238,7 +2238,7 @@ def _build_visualization_multiscale_components( downsampled, str(zarr_path), component=level_component, - overwrite=True, + zarr_array_kwargs={"overwrite": True}, compute=True, ) diff --git a/tests/conftest.py b/tests/conftest.py index 675e45d..2cea42d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ import os import sys +from typing import Any, Optional, Tuple import pytest @@ -25,6 +26,62 @@ } +def _zarr_shape_from_data(data: Any) -> Optional[Tuple[int, ...]]: + """Return a concrete tuple shape for array-like input when available.""" + shape = getattr(data, "shape", None) + if shape is None: + return None + try: + return tuple(int(v) for v in shape) + except Exception: + return None + + +def _zarr_dtype_from_data(data: Any) -> Any: + """Return a dtype-like object inferred from array-like input when available.""" + return getattr(data, "dtype", None) + + +@pytest.fixture(scope="session", autouse=True) +def _compat_zarr_v3_create_dataset_with_data() -> None: + """Backfill v2-style ``create_dataset(data=...)`` semantics for tests. + + Notes + ----- + Zarr v3 requires ``shape=...`` even when ``data=...`` is provided. A large + portion of existing tests still use v2-style calls that omit ``shape``. + This shim keeps test fixtures readable while preserving production behavior. + """ + import zarr + from _pytest.monkeypatch import MonkeyPatch + + original = zarr.core.group.Group.create_dataset + monkeypatch = MonkeyPatch() + + def _compat_create_dataset( + self: Any, + name: str, + *args: Any, + **kwargs: Any, + ) -> Any: + if "shape" not in kwargs and "data" in kwargs: + data = kwargs.get("data") + inferred_shape = _zarr_shape_from_data(data) + if inferred_shape is not None: + kwargs["shape"] = inferred_shape + if "dtype" not in kwargs: + inferred_dtype = _zarr_dtype_from_data(data) + if inferred_dtype is not None: + kwargs["dtype"] = inferred_dtype + return original(self, name, *args, **kwargs) + + monkeypatch.setattr(zarr.core.group.Group, "create_dataset", _compat_create_dataset) + try: + yield + finally: + monkeypatch.undo() + + @pytest.fixture(autouse=True) def _stub_napari_opengl_probe_for_macos( request: pytest.FixtureRequest, diff --git a/tests/flatfield/test_pipeline.py b/tests/flatfield/test_pipeline.py index bcb214c..1cca360 100644 --- a/tests/flatfield/test_pipeline.py +++ b/tests/flatfield/test_pipeline.py @@ -12,6 +12,7 @@ from clearex.flatfield.pipeline import run_flatfield_analysis from clearex.io.experiment import create_dask_client +from clearex.io.zarr_storage import detect_store_format import clearex.flatfield.pipeline as flatfield_pipeline @@ -679,21 +680,29 @@ def fit(self, images, skip_shape_warning=False) -> None: "fit_baseline_sum_pctz" ] expected_chunk_shape = tuple(int(v) for v in checkpoint_array.chunks) + store_format = detect_store_format(store_path) chunk_root = ( store_path / "results" / "flatfield" / "latest" / "checkpoint" / "fit_baseline_sum_pctz" ) chunk_files = [p for p in chunk_root.rglob("*") if p.is_file() and p.name != "attributes.json"] assert chunk_files, "Expected at least one written checkpoint chunk in N5 store." - for chunk_file in chunk_files: - with chunk_file.open("rb") as handle: - header = handle.read(64) - num_dims = struct.unpack(">H", header[2:4])[0] - chunk_shape = tuple( - struct.unpack(">I", header[index : index + 4])[0] - for index in range(4, 4 + 4 * num_dims, 4) - )[::-1] - assert chunk_shape == expected_chunk_shape + if int(store_format or 0) == 2: + for chunk_file in chunk_files: + with chunk_file.open("rb") as handle: + header = handle.read(64) + num_dims = struct.unpack(">H", header[2:4])[0] + chunk_shape = tuple( + struct.unpack(">I", header[index : index + 4])[0] + for index in range(4, 4 + 4 * num_dims, 4) + )[::-1] + assert chunk_shape == expected_chunk_shape + else: + assert len(expected_chunk_shape) == int(checkpoint_array.ndim) + assert flatfield_pipeline._dataset_chunk_probe_is_readable( + checkpoint_array, + scan_profile_axes=True, + ) def test_run_flatfield_analysis_restarts_on_malformed_n5_checkpoint_chunk( @@ -756,7 +765,11 @@ def fit(self, images, skip_shape_warning=False) -> None: writable_root = zarr.open_group(str(store_path), mode="a") malformed = writable_root["results"]["flatfield"]["latest"]["checkpoint"]["fit_baseline_sum_pctz"] malformed[0, 0, :, :] = np.asarray(malformed[0, 0, :, :], dtype=np.float32) + np.float32(1.0) - with pytest.raises(AssertionError, match="Expected chunk of shape"): + store_format = detect_store_format(store_path) + if int(store_format or 0) == 2: + with pytest.raises(AssertionError, match="Expected chunk of shape"): + np.asarray(malformed[0:1, 0:1, :, :], dtype=np.float32) + else: np.asarray(malformed[0:1, 0:1, :, :], dtype=np.float32) client = create_dask_client(n_workers=1, threads_per_worker=1, processes=False) @@ -771,7 +784,9 @@ def fit(self, images, skip_shape_warning=False) -> None: resumed_root = zarr.open_group(str(store_path), mode="r") resumed_latest = resumed_root["results"]["flatfield"]["latest"] - assert bool(resumed_latest.attrs["resumed_from_checkpoint"]) is False + assert bool(resumed_latest.attrs["resumed_from_checkpoint"]) is bool( + int(store_format or 0) != 2 + ) def test_run_flatfield_analysis_tiled_fallback_uses_full_volume_profile( From ff3847574d193ad630e3a6ecea02bb9994e32693 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 10:17:07 -0500 Subject: [PATCH 06/10] Handle N5 metadata-load fallback in setup dialog --- src/clearex/gui/app.py | 140 ++++++++++++++++++++++++++++++-- tests/gui/test_gui_execution.py | 78 ++++++++++++++++++ 2 files changed, 212 insertions(+), 6 deletions(-) diff --git a/src/clearex/gui/app.py b/src/clearex/gui/app.py index 53a7147..4626e60 100644 --- a/src/clearex/gui/app.py +++ b/src/clearex/gui/app.py @@ -129,6 +129,7 @@ ) # Third Party Imports +import numpy as np import zarr try: @@ -1859,6 +1860,107 @@ def _extract_axis_map(info: ImageInfo) -> Dict[str, int]: } +def _should_use_n5_experiment_metadata_fallback( + *, + source_data_path: Path, + load_error: Exception, +) -> bool: + """Return whether setup metadata loading should fallback for N5 sources. + + Parameters + ---------- + source_data_path : pathlib.Path + Resolved acquisition source path. + load_error : Exception + Exception raised by the image opener. + + Returns + ------- + bool + ``True`` when fallback metadata should be synthesized from + ``experiment.yml`` values. + + Notes + ----- + This fallback is intentionally narrow and currently targets legacy N5 + sources that cannot be opened by the active Zarr runtime. Other source + types and error categories continue to raise as before. + """ + if source_data_path.suffix.lower() != ".n5": + return False + if not isinstance(load_error, ValueError): + return False + message = str(load_error).lower() + return "no suitable reader found" in message + + +def _build_experiment_metadata_fallback_image_info( + *, + experiment: NavigateExperiment, + source_data_path: Path, + load_error: Exception, +) -> ImageInfo: + """Build a synthetic :class:`ImageInfo` from Navigate experiment metadata. + + Parameters + ---------- + experiment : NavigateExperiment + Parsed experiment metadata. + source_data_path : pathlib.Path + Resolved acquisition source path. + load_error : Exception + Exception raised while opening the source path. + + Returns + ------- + ImageInfo + Metadata-only fallback image info using canonical ``(t, p, c, z, y, x)`` + shape from the experiment descriptor. + + Notes + ----- + Fallback dtype defaults to ``uint16`` because raw source dtype is + unavailable when reader probing fails. + """ + shape_tpczyx = ( + max(1, int(experiment.timepoints)), + max(1, int(experiment.multiposition_count)), + max(1, int(experiment.channel_count)), + max(1, int(experiment.number_z_steps)), + max(1, int(experiment.y_pixels)), + max(1, int(experiment.x_pixels)), + ) + + metadata: Dict[str, Any] = { + "navigate_experiment": { + "path": str(experiment.path), + "file_type": str(experiment.file_type), + "timepoints": int(experiment.timepoints), + "positions": int(experiment.multiposition_count), + "channels": int(experiment.channel_count), + "z_steps": int(experiment.number_z_steps), + "y_pixels": int(experiment.y_pixels), + "x_pixels": int(experiment.x_pixels), + }, + "source_reader_fallback": { + "source_path": str(source_data_path), + "reason": f"{type(load_error).__name__}: {load_error}", + }, + } + xy_um = _coerce_positive_float(experiment.xy_pixel_size_um) + z_um = _coerce_positive_float(experiment.z_step_um) + if xy_um is not None and z_um is not None: + metadata["voxel_size_um_zyx"] = [float(z_um), float(xy_um), float(xy_um)] + + return ImageInfo( + path=source_data_path, + shape=shape_tpczyx, + dtype=np.dtype("uint16"), + axes="TPCZYX", + metadata=metadata, + ) + + def _metadata_count( metadata: Optional[Dict[str, Any]], keys: tuple[str, ...] ) -> Optional[int]: @@ -6937,16 +7039,42 @@ def _load_experiment_context( FileNotFoundError If the selected path does not exist. Exception - Propagates parse/read failures from experiment or image I/O. + Propagates parse/read failures from experiment metadata and + non-fallback image I/O errors. + + Notes + ----- + Legacy N5 sources may fail reader probing under environments that + do not expose N5 support in the active Zarr runtime. In that case, + setup falls back to ``experiment.yml`` dimensions so operators can + continue to canonical-store materialization. """ experiment_path, experiment, source_data_path = ( self._resolve_experiment_source_context(path=path) ) - _, info = self._opener.open( - path=str(source_data_path), - prefer_dask=True, - chunks=self._chunks, - ) + try: + _, info = self._opener.open( + path=str(source_data_path), + prefer_dask=True, + chunks=self._chunks, + ) + except Exception as exc: + if not _should_use_n5_experiment_metadata_fallback( + source_data_path=source_data_path, + load_error=exc, + ): + raise + logging.getLogger(__name__).warning( + "Falling back to experiment metadata for source %s after " + "reader error: %s", + source_data_path, + exc, + ) + info = _build_experiment_metadata_fallback_image_info( + experiment=experiment, + source_data_path=source_data_path, + load_error=exc, + ) return experiment_path, experiment, source_data_path, info def _resolve_store_preparation_request( diff --git a/tests/gui/test_gui_execution.py b/tests/gui/test_gui_execution.py index 161ecd6..037e863 100644 --- a/tests/gui/test_gui_execution.py +++ b/tests/gui/test_gui_execution.py @@ -159,6 +159,84 @@ def test_summarize_image_info_extracts_pixel_size_from_voxel_size_metadata() -> assert summary["pixel_size"] == "z=0.2, y=0.166992, x=0.166992" +def test_load_experiment_context_falls_back_for_n5_reader_error( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "CH00_000000.n5" + source_path.mkdir(parents=True) + experiment = _make_navigate_experiment(experiment_path) + + class _FailingOpener: + def open(self, **kwargs): + del kwargs + raise ValueError("No suitable reader found for:", source_path) + + fake_dialog = SimpleNamespace( + _chunks=(1, 1, 1, 8, 8, 8), + _opener=_FailingOpener(), + _resolve_experiment_source_context=lambda *, path: ( + Path(path).expanduser().resolve(), + experiment, + source_path.resolve(), + ), + ) + + loaded_path, loaded_experiment, loaded_source_path, info = ( + app_module.ClearExSetupDialog._load_experiment_context( + fake_dialog, + path=experiment_path, + ) + ) + + assert loaded_path == experiment_path.resolve() + assert loaded_experiment is experiment + assert loaded_source_path == source_path.resolve() + assert info.shape == ( + experiment.timepoints, + experiment.multiposition_count, + experiment.channel_count, + experiment.number_z_steps, + experiment.y_pixels, + experiment.x_pixels, + ) + assert info.axes == "TPCZYX" + assert int(info.dtype.itemsize) == 2 + assert info.metadata is not None + assert "navigate_experiment" in info.metadata + assert "source_reader_fallback" in info.metadata + + +def test_load_experiment_context_raises_for_non_n5_reader_error( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "data_store.zarr" + source_path.mkdir(parents=True) + experiment = _make_navigate_experiment(experiment_path) + + class _FailingOpener: + def open(self, **kwargs): + del kwargs + raise ValueError("No suitable reader found for:", source_path) + + fake_dialog = SimpleNamespace( + _chunks=(1, 1, 1, 8, 8, 8), + _opener=_FailingOpener(), + _resolve_experiment_source_context=lambda *, path: ( + Path(path).expanduser().resolve(), + experiment, + source_path.resolve(), + ), + ) + + with pytest.raises(ValueError, match="No suitable reader found"): + app_module.ClearExSetupDialog._load_experiment_context( + fake_dialog, + path=experiment_path, + ) + + def test_discover_navigate_experiment_files_recurses_and_sorts(tmp_path) -> None: alpha = tmp_path / "alpha" / "experiment.yml" beta = tmp_path / "beta" / "nested" / "experiment.yaml" From e9f02e14b594bf43d4a81de2f6b448869f711573 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 10:52:02 -0500 Subject: [PATCH 07/10] Auto-fallback N5 helper to uv zarr2 runtime --- src/clearex/io/experiment.py | 85 +++++++++++++++++++++++++++++++++--- tests/io/test_experiment.py | 54 +++++++++++++++++++++-- 2 files changed, 129 insertions(+), 10 deletions(-) diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 5f85fb9..e457099 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -1238,7 +1238,7 @@ def _collect_h5_entries(name: str, obj: Any) -> None: else: root = zarr.open_group(str(source_path), mode="r") group_attrs = dict(getattr(root, "attrs", {})) - entries: list[tuple[int, int, str, AxesSpec]] = [] + entries: list[tuple[int, int, str, AxesSpec, Any]] = [] def _walk(group_node: Any, prefix: str = "") -> None: for key in sorted(group_node.array_keys()): @@ -1253,6 +1253,7 @@ def _walk(group_node: Any, prefix: str = "") -> None: int(match.group(1)), component, _extract_zarr_axes(array, group_attrs), + array, ) ) for key in sorted(group_node.group_keys()): @@ -1262,7 +1263,7 @@ def _walk(group_node: Any, prefix: str = "") -> None: if len(entries) <= 1: return None - for time_index, setup_index, component, source_axes in sorted( + for time_index, setup_index, component, source_axes, source_zarr_array in sorted( entries, key=lambda item: (item[0], item[1], item[2]) ): if setup_map is not None and setup_index not in setup_map: @@ -1277,7 +1278,7 @@ def _walk(group_node: Any, prefix: str = "") -> None: if key in arrays_by_index: continue - source_array = da.from_zarr(str(source_path), component=component) + source_array = da.from_zarr(source_zarr_array) normalized_axes = tuple( source_axes or _infer_source_axes(tuple(source_array.shape), experiment) ) @@ -3410,6 +3411,74 @@ def _legacy_n5_helper_python() -> Optional[str]: return None +def _legacy_n5_helper_command_prefix() -> Optional[tuple[str, ...]]: + """Return a command prefix that can run Python with ``zarr.N5Store``. + + Parameters + ---------- + None + + Returns + ------- + tuple[str, ...], optional + Command prefix ending in ``python``. Returns a direct Python executable + when available, otherwise falls back to ``uv run --with zarr<3 python`` + when that probe succeeds. + + Notes + ----- + The ``uv`` fallback avoids requiring users to pre-create a separate + legacy environment in common setups where ClearEx already runs under uv. + """ + legacy_python = _legacy_n5_helper_python() + if legacy_python is not None: + return (legacy_python,) + + uv_candidates: list[str] = [] + uv_from_path = shutil.which("uv") + if uv_from_path: + uv_candidates.append(str(uv_from_path)) + uv_tool_bin_dir = str(os.environ.get("UV_TOOL_BIN_DIR", "")).strip() + if uv_tool_bin_dir: + uv_candidates.append(str((Path(uv_tool_bin_dir).expanduser() / "uv"))) + uv_install_dir = str(os.environ.get("UV_INSTALL_DIR", "")).strip() + if uv_install_dir: + uv_candidates.append(str((Path(uv_install_dir).expanduser() / "uv"))) + + seen: set[str] = set() + for uv_executable in uv_candidates: + if not uv_executable or uv_executable in seen: + continue + seen.add(uv_executable) + probe_command = [ + uv_executable, + "run", + "--with", + "zarr<3", + "python", + "-c", + "import zarr,sys; sys.exit(0 if hasattr(zarr, 'N5Store') else 1)", + ] + try: + probe = subprocess.run( + probe_command, + check=False, + capture_output=True, + text=True, + ) + except Exception: + continue + if probe.returncode == 0: + return ( + uv_executable, + "run", + "--with", + "zarr<3", + "python", + ) + return None + + def _materialize_n5_via_legacy_helper( *, experiment: "NavigateExperiment", @@ -3426,17 +3495,19 @@ def _materialize_n5_via_legacy_helper( ], ) -> Path: """Materialize an N5 source into an intermediate v2 ClearEx store.""" - legacy_python = _legacy_n5_helper_python() - if legacy_python is None: + helper_command_prefix = _legacy_n5_helper_command_prefix() + if helper_command_prefix is None: raise RuntimeError( "N5 ingestion requires a legacy Python environment with zarr.N5Store. " - "Set CLEAREX_LEGACY_N5_PYTHON to a compatible interpreter." + "Set CLEAREX_LEGACY_N5_PYTHON to a compatible interpreter or " + "ensure `uv` is available so ClearEx can run the helper with " + "`zarr<3` automatically." ) legacy_output = resolve_legacy_v2_store_path(output_store_path) repo_root = Path(__file__).resolve().parents[3] command = [ - legacy_python, + *helper_command_prefix, "-m", "clearex.io.n5_legacy_helper", "--experiment-path", diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index 666f762..a91af67 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -144,13 +144,13 @@ def _open_test_zarr_group( def _write_real_n5_store(path: Path, entries: dict[str, np.ndarray]) -> None: - python_executable = experiment_module._legacy_n5_helper_python() - if python_executable is None: + command_prefix = experiment_module._legacy_n5_helper_command_prefix() + if command_prefix is None: pytest.skip("No zarr2-compatible Python with N5Store is available.") payload = {name: np.asarray(array).tolist() for name, array in entries.items()} command = [ - python_executable, + *command_prefix, "-c", ( "from pathlib import Path; " @@ -167,6 +167,54 @@ def _write_real_n5_store(path: Path, entries: dict[str, np.ndarray]) -> None: subprocess.run(command, check=True) +def test_legacy_n5_helper_command_prefix_prefers_direct_python( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + experiment_module, + "_legacy_n5_helper_python", + lambda: "/tmp/legacy-python", + ) + + prefix = experiment_module._legacy_n5_helper_command_prefix() + + assert prefix == ("/tmp/legacy-python",) + + +def test_legacy_n5_helper_command_prefix_falls_back_to_uv( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(experiment_module, "_legacy_n5_helper_python", lambda: None) + monkeypatch.setattr(experiment_module.shutil, "which", lambda name: "/usr/bin/uv") + + class _ProbeResult: + returncode = 0 + + commands: list[list[str]] = [] + + def _fake_run(command: list[str], **kwargs: object) -> _ProbeResult: + del kwargs + commands.append(command) + return _ProbeResult() + + monkeypatch.setattr(experiment_module.subprocess, "run", _fake_run) + + prefix = experiment_module._legacy_n5_helper_command_prefix() + + assert prefix == ("/usr/bin/uv", "run", "--with", "zarr<3", "python") + assert commands == [ + [ + "/usr/bin/uv", + "run", + "--with", + "zarr<3", + "python", + "-c", + "import zarr,sys; sys.exit(0 if hasattr(zarr, 'N5Store') else 1)", + ] + ] + + def _write_multipositions_sidecar(path: Path, count: int) -> None: header = ["X", "Y", "Z", "THETA", "F", "X_PIXEL", "Y_PIXEL"] rows = [header] From c207fa7d7ef92adb2f615c3d141a7e4da28c4d39 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 11:24:39 -0500 Subject: [PATCH 08/10] Forward legacy N5 helper writes to active Dask scheduler --- src/clearex/io/experiment.py | 76 ++++++++++++++++++++++++++++- src/clearex/io/n5_legacy_helper.py | 33 ++++++++++--- tests/io/test_experiment.py | 77 +++++++++++++++++++++++++++++ tests/io/test_n5_legacy_helper.py | 78 ++++++++++++++++++++++++++++++ 4 files changed, 255 insertions(+), 9 deletions(-) create mode 100644 tests/io/test_n5_legacy_helper.py diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index e457099..75c212f 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -3479,6 +3479,48 @@ def _legacy_n5_helper_command_prefix() -> Optional[tuple[str, ...]]: return None +def _extract_client_scheduler_address(client: Optional["Client"]) -> Optional[str]: + """Return scheduler address for a connected Dask client. + + Parameters + ---------- + client : dask.distributed.Client, optional + Connected client instance. + + Returns + ------- + str, optional + Scheduler address when it can be resolved from the client. + + Notes + ----- + This helper is best-effort and never raises. It checks both direct client + attributes and scheduler metadata for compatibility across distributed + versions. + """ + if client is None: + return None + + try: + scheduler = getattr(client, "scheduler", None) + address = getattr(scheduler, "address", None) + if isinstance(address, str) and address.strip(): + return address.strip() + except Exception: + pass + + try: + scheduler_info = client.scheduler_info() + except Exception: + return None + if not isinstance(scheduler_info, dict): + return None + address_value = scheduler_info.get("address") + if isinstance(address_value, str) and address_value.strip(): + return address_value.strip() + return None + + def _materialize_n5_via_legacy_helper( *, experiment: "NavigateExperiment", @@ -3493,8 +3535,31 @@ def _materialize_n5_via_legacy_helper( tuple[int, ...], tuple[int, ...], ], + client: Optional["Client"] = None, ) -> Path: - """Materialize an N5 source into an intermediate v2 ClearEx store.""" + """Materialize an N5 source into an intermediate v2 ClearEx store. + + Parameters + ---------- + experiment : NavigateExperiment + Parsed experiment metadata. + source_path : pathlib.Path + Source N5 path. + output_store_path : pathlib.Path + Canonical analysis-store destination path. + chunks : tuple[int, int, int, int, int, int] + Canonical write chunks. + pyramid_factors : tuple[tuple[int, ...], ...] + Canonical pyramid factors. + client : dask.distributed.Client, optional + Active Dask client. When provided, the helper reconnects to this + scheduler so legacy N5 writes honor the selected backend. + + Returns + ------- + pathlib.Path + Path to the produced legacy-v2 handoff store. + """ helper_command_prefix = _legacy_n5_helper_command_prefix() if helper_command_prefix is None: raise RuntimeError( @@ -3521,6 +3586,14 @@ def _materialize_n5_via_legacy_helper( "--pyramid-factors", json.dumps([[int(value) for value in axis_levels] for axis_levels in pyramid_factors]), ] + scheduler_address = _extract_client_scheduler_address(client) + if scheduler_address: + command.extend( + [ + "--scheduler-address", + str(scheduler_address), + ] + ) subprocess.run( command, check=True, @@ -3681,6 +3754,7 @@ def _emit_progress(percent: int, message: str) -> None: output_store_path=final_store_path, chunks=chunks, pyramid_factors=pyramid_factors, + client=client, ) migrated_legacy_store = migrate_analysis_store( legacy_store_path, diff --git a/src/clearex/io/n5_legacy_helper.py b/src/clearex/io/n5_legacy_helper.py index fed6de8..098fd4e 100644 --- a/src/clearex/io/n5_legacy_helper.py +++ b/src/clearex/io/n5_legacy_helper.py @@ -8,6 +8,7 @@ from pathlib import Path from clearex.io.experiment import ( + create_dask_client, load_navigate_experiment, materialize_experiment_data_store, ) @@ -29,6 +30,7 @@ def main() -> int: parser.add_argument("--output-store", required=True) parser.add_argument("--chunks", required=True) parser.add_argument("--pyramid-factors", required=True) + parser.add_argument("--scheduler-address", default=None) args = parser.parse_args() chunks = _parse_chunks(str(args.chunks)) @@ -45,14 +47,29 @@ def main() -> int: Path(args.output_store).expanduser().resolve() ) - experiment = load_navigate_experiment(Path(args.experiment_path)) - materialize_experiment_data_store( - experiment=experiment, - source_path=Path(args.source_path), - chunks=chunks, - pyramid_factors=pyramid_factors, - force_rebuild=True, - ) + helper_client = None + try: + scheduler_address = str(args.scheduler_address or "").strip() + if scheduler_address: + helper_client = create_dask_client( + scheduler_address=scheduler_address + ) + + experiment = load_navigate_experiment(Path(args.experiment_path)) + materialize_experiment_data_store( + experiment=experiment, + source_path=Path(args.source_path), + chunks=chunks, + pyramid_factors=pyramid_factors, + client=helper_client, + force_rebuild=True, + ) + finally: + if helper_client is not None: + try: + helper_client.close() + except Exception: + pass return 0 diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index a91af67..5f8fadb 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -215,6 +215,83 @@ def _fake_run(command: list[str], **kwargs: object) -> _ProbeResult: ] +def test_extract_client_scheduler_address_prefers_scheduler_attr() -> None: + class _FakeScheduler: + address = "tcp://127.0.0.1:8786" + + class _FakeClient: + scheduler = _FakeScheduler() + + def scheduler_info(self): # pragma: no cover - should not be called + raise AssertionError("scheduler_info should not be queried") + + assert ( + experiment_module._extract_client_scheduler_address(_FakeClient()) + == "tcp://127.0.0.1:8786" + ) + + +def test_extract_client_scheduler_address_falls_back_to_scheduler_info() -> None: + class _FakeClient: + scheduler = None + + def scheduler_info(self): + return {"address": "tcp://10.0.0.2:8786"} + + assert ( + experiment_module._extract_client_scheduler_address(_FakeClient()) + == "tcp://10.0.0.2:8786" + ) + + +def test_materialize_n5_via_legacy_helper_forwards_scheduler_address( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="N5") + experiment = load_navigate_experiment(experiment_path) + source_path = tmp_path / "CH00_000000.n5" + source_path.mkdir() + output_store = tmp_path / "CH00_000000.n5.clearex.zarr" + + monkeypatch.setattr( + experiment_module, + "_legacy_n5_helper_command_prefix", + lambda: ("/usr/bin/python3",), + ) + + class _Scheduler: + address = "tcp://127.0.0.1:8786" + + class _Client: + scheduler = _Scheduler() + + captured: dict[str, object] = {} + + def _fake_run(command, **kwargs): + captured["command"] = list(command) + captured["kwargs"] = dict(kwargs) + return None + + monkeypatch.setattr(experiment_module.subprocess, "run", _fake_run) + + returned = experiment_module._materialize_n5_via_legacy_helper( + experiment=experiment, + source_path=source_path, + output_store_path=output_store, + chunks=(1, 1, 1, 64, 64, 64), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + client=_Client(), + ) + + command = captured["command"] + assert "--scheduler-address" in command + flag_index = command.index("--scheduler-address") + assert command[flag_index + 1] == "tcp://127.0.0.1:8786" + assert returned == output_store.with_name(f"{output_store.name}.legacy-v2.zarr") + + def _write_multipositions_sidecar(path: Path, count: int) -> None: header = ["X", "Y", "Z", "THETA", "F", "X_PIXEL", "Y_PIXEL"] rows = [header] diff --git a/tests/io/test_n5_legacy_helper.py b/tests/io/test_n5_legacy_helper.py new file mode 100644 index 0000000..dab34d7 --- /dev/null +++ b/tests/io/test_n5_legacy_helper.py @@ -0,0 +1,78 @@ +# Copyright (c) 2021-2025 The University of Texas Southwestern Medical Center. +# All rights reserved. + +from __future__ import annotations + +from pathlib import Path + +import clearex.io.n5_legacy_helper as helper_module + + +def test_helper_uses_scheduler_address_when_provided( + tmp_path: Path, + monkeypatch, +) -> None: + captured: dict[str, object] = {} + + class _FakeClient: + def close(self) -> None: + captured["client_closed"] = True + + def _fake_create_dask_client(*, scheduler_address=None, **kwargs): + del kwargs + captured["scheduler_address"] = scheduler_address + return _FakeClient() + + def _fake_load_navigate_experiment(path: Path): + captured["experiment_path"] = Path(path) + return object() + + def _fake_materialize_experiment_data_store(**kwargs): + captured["materialize_kwargs"] = dict(kwargs) + return object() + + monkeypatch.setattr(helper_module, "create_dask_client", _fake_create_dask_client) + monkeypatch.setattr( + helper_module, "load_navigate_experiment", _fake_load_navigate_experiment + ) + monkeypatch.setattr( + helper_module, + "materialize_experiment_data_store", + _fake_materialize_experiment_data_store, + ) + + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "source.n5" + output_store = tmp_path / "out.zarr" + source_path.mkdir(parents=True) + + monkeypatch.setattr( + helper_module, + "main", + helper_module.main, + ) + monkeypatch.setattr( + helper_module.argparse.ArgumentParser, + "parse_args", + lambda self: type( + "_Args", + (), + { + "experiment_path": str(experiment_path), + "source_path": str(source_path), + "output_store": str(output_store), + "chunks": "1,1,1,8,8,8", + "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", + "scheduler_address": "tcp://scheduler:8786", + }, + )(), + ) + + exit_code = helper_module.main() + + assert exit_code == 0 + assert captured["scheduler_address"] == "tcp://scheduler:8786" + materialize_kwargs = captured["materialize_kwargs"] + assert materialize_kwargs["client"] is not None + assert materialize_kwargs["force_rebuild"] is True + assert captured.get("client_closed", False) is True From cf746697715e3356cca8f703704a98ef7d0bef0d Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 11:36:52 -0500 Subject: [PATCH 09/10] Expand N5 legacy-helper backend regression coverage --- tests/io/test_experiment.py | 99 +++++++++++++++++++++++++ tests/io/test_n5_legacy_helper.py | 119 ++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index 5f8fadb..dc9bc34 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -292,6 +292,105 @@ def _fake_run(command, **kwargs): assert returned == output_store.with_name(f"{output_store.name}.legacy-v2.zarr") +def test_materialize_n5_via_legacy_helper_omits_scheduler_address_without_client( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="N5") + experiment = load_navigate_experiment(experiment_path) + source_path = tmp_path / "CH00_000000.n5" + source_path.mkdir() + output_store = tmp_path / "CH00_000000.n5.clearex.zarr" + + monkeypatch.setattr( + experiment_module, + "_legacy_n5_helper_command_prefix", + lambda: ("/usr/bin/python3",), + ) + + captured: dict[str, object] = {} + + def _fake_run(command, **kwargs): + captured["command"] = list(command) + captured["kwargs"] = dict(kwargs) + return None + + monkeypatch.setattr(experiment_module.subprocess, "run", _fake_run) + + _ = experiment_module._materialize_n5_via_legacy_helper( + experiment=experiment, + source_path=source_path, + output_store_path=output_store, + chunks=(1, 1, 1, 64, 64, 64), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + client=None, + ) + + command = captured["command"] + assert "--scheduler-address" not in command + + +def test_materialize_experiment_data_store_passes_client_to_legacy_helper( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="N5") + experiment = load_navigate_experiment(experiment_path) + source_path = tmp_path / "CH00_000000.n5" + source_path.mkdir() + final_store = source_path.with_name(f"{source_path.name}.clearex.zarr") + root = _open_test_zarr_group(final_store, mode="w", zarr_format=3) + root.create_dataset( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + + class _Client: + pass + + client = _Client() + captured: dict[str, object] = {} + + def _fake_legacy_helper(**kwargs): + captured["legacy_client"] = kwargs.get("client") + legacy_store = tmp_path / "legacy-output.zarr" + legacy_store.mkdir(exist_ok=True) + return legacy_store + + monkeypatch.setattr(experiment_module, "is_clearex_analysis_store", lambda _path: False) + monkeypatch.setattr( + experiment_module, + "_materialize_n5_via_legacy_helper", + _fake_legacy_helper, + ) + monkeypatch.setattr( + experiment_module, + "migrate_analysis_store", + lambda _path, keep_backup=False: tmp_path / "legacy-migrated.zarr", + ) + monkeypatch.setattr( + experiment_module, + "replace_store_path", + lambda **kwargs: None, + ) + + result = materialize_experiment_data_store( + experiment=experiment, + source_path=source_path, + chunks=(1, 1, 1, 2, 2, 2), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + client=client, + ) + + assert captured["legacy_client"] is client + assert result.store_path == final_store.resolve() + + def _write_multipositions_sidecar(path: Path, count: int) -> None: header = ["X", "Y", "Z", "THETA", "F", "X_PIXEL", "Y_PIXEL"] rows = [header] diff --git a/tests/io/test_n5_legacy_helper.py b/tests/io/test_n5_legacy_helper.py index dab34d7..e73b357 100644 --- a/tests/io/test_n5_legacy_helper.py +++ b/tests/io/test_n5_legacy_helper.py @@ -6,6 +6,7 @@ from pathlib import Path import clearex.io.n5_legacy_helper as helper_module +import pytest def test_helper_uses_scheduler_address_when_provided( @@ -76,3 +77,121 @@ def _fake_materialize_experiment_data_store(**kwargs): assert materialize_kwargs["client"] is not None assert materialize_kwargs["force_rebuild"] is True assert captured.get("client_closed", False) is True + + +def test_helper_does_not_create_client_without_scheduler_address( + tmp_path: Path, + monkeypatch, +) -> None: + captured: dict[str, object] = {} + + def _fake_create_dask_client(*, scheduler_address=None, **kwargs): + del scheduler_address, kwargs + raise AssertionError("create_dask_client should not be called") + + def _fake_load_navigate_experiment(path: Path): + captured["experiment_path"] = Path(path) + return object() + + def _fake_materialize_experiment_data_store(**kwargs): + captured["materialize_kwargs"] = dict(kwargs) + return object() + + monkeypatch.setattr(helper_module, "create_dask_client", _fake_create_dask_client) + monkeypatch.setattr( + helper_module, "load_navigate_experiment", _fake_load_navigate_experiment + ) + monkeypatch.setattr( + helper_module, + "materialize_experiment_data_store", + _fake_materialize_experiment_data_store, + ) + + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "source.n5" + output_store = tmp_path / "out.zarr" + source_path.mkdir(parents=True) + + monkeypatch.setattr( + helper_module.argparse.ArgumentParser, + "parse_args", + lambda self: type( + "_Args", + (), + { + "experiment_path": str(experiment_path), + "source_path": str(source_path), + "output_store": str(output_store), + "chunks": "1,1,1,8,8,8", + "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", + "scheduler_address": "", + }, + )(), + ) + + exit_code = helper_module.main() + + assert exit_code == 0 + assert captured["materialize_kwargs"]["client"] is None + + +def test_helper_closes_client_on_materialize_failure( + tmp_path: Path, + monkeypatch, +) -> None: + captured: dict[str, object] = {} + + class _FakeClient: + def close(self) -> None: + captured["client_closed"] = True + + def _fake_create_dask_client(*, scheduler_address=None, **kwargs): + del kwargs + captured["scheduler_address"] = scheduler_address + return _FakeClient() + + def _fake_load_navigate_experiment(path: Path): + captured["experiment_path"] = Path(path) + return object() + + def _fake_materialize_experiment_data_store(**kwargs): + del kwargs + raise RuntimeError("materialize failed") + + monkeypatch.setattr(helper_module, "create_dask_client", _fake_create_dask_client) + monkeypatch.setattr( + helper_module, "load_navigate_experiment", _fake_load_navigate_experiment + ) + monkeypatch.setattr( + helper_module, + "materialize_experiment_data_store", + _fake_materialize_experiment_data_store, + ) + + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "source.n5" + output_store = tmp_path / "out.zarr" + source_path.mkdir(parents=True) + + monkeypatch.setattr( + helper_module.argparse.ArgumentParser, + "parse_args", + lambda self: type( + "_Args", + (), + { + "experiment_path": str(experiment_path), + "source_path": str(source_path), + "output_store": str(output_store), + "chunks": "1,1,1,8,8,8", + "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", + "scheduler_address": "tcp://scheduler:8786", + }, + )(), + ) + + with pytest.raises(RuntimeError, match="materialize failed"): + helper_module.main() + + assert captured["scheduler_address"] == "tcp://scheduler:8786" + assert captured.get("client_closed", False) is True From 738e2195e50c7d814c81c9d8253158e4a3e68596 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Sat, 21 Mar 2026 13:38:31 -0500 Subject: [PATCH 10/10] Handle inproc scheduler forwarding for legacy N5 helper --- src/clearex/io/experiment.py | 117 +++++++++++++++++++++++++++-- src/clearex/io/n5_legacy_helper.py | 18 +++++ tests/io/test_experiment.py | 68 +++++++++++++++++ tests/io/test_n5_legacy_helper.py | 77 +++++++++++++++++++ 4 files changed, 274 insertions(+), 6 deletions(-) diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 75c212f..d6b6bd1 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -3479,6 +3479,27 @@ def _legacy_n5_helper_command_prefix() -> Optional[tuple[str, ...]]: return None +def _is_subprocess_reachable_scheduler_address(address: str) -> bool: + """Return whether a scheduler address can be used from a subprocess. + + Parameters + ---------- + address : str + Candidate scheduler address. + + Returns + ------- + bool + ``True`` when the address scheme is process-external and connectable + from a separate helper process. + """ + text = str(address).strip() + if not text or "://" not in text: + return False + scheme = text.split("://", 1)[0].strip().lower() + return scheme in {"tcp", "tls", "ucx", "ws", "wss"} + + def _extract_client_scheduler_address(client: Optional["Client"]) -> Optional[str]: """Return scheduler address for a connected Dask client. @@ -3498,27 +3519,92 @@ def _extract_client_scheduler_address(client: Optional["Client"]) -> Optional[st attributes and scheduler metadata for compatibility across distributed versions. """ + candidates: list[str] = [] if client is None: return None - try: scheduler = getattr(client, "scheduler", None) address = getattr(scheduler, "address", None) if isinstance(address, str) and address.strip(): - return address.strip() + candidates.append(address.strip()) except Exception: pass + try: + scheduler_info = client.scheduler_info() + except Exception: + scheduler_info = None + if isinstance(scheduler_info, dict): + address_value = scheduler_info.get("address") + if isinstance(address_value, str) and address_value.strip(): + candidates.append(address_value.strip()) + + for candidate in candidates: + if _is_subprocess_reachable_scheduler_address(candidate): + return candidate + return None + + +def _extract_client_local_cluster_hints( + client: Optional["Client"], +) -> Optional[tuple[int, int, Optional[int]]]: + """Return local-cluster sizing hints from a connected Dask client. + + Parameters + ---------- + client : dask.distributed.Client, optional + Connected client instance. + + Returns + ------- + tuple[int, int, int | None], optional + ``(n_workers, threads_per_worker, min_worker_memory_limit_bytes)`` when + scheduler worker metadata is available; otherwise ``None``. + """ + if client is None: + return None try: scheduler_info = client.scheduler_info() except Exception: return None if not isinstance(scheduler_info, dict): return None - address_value = scheduler_info.get("address") - if isinstance(address_value, str) and address_value.strip(): - return address_value.strip() - return None + workers = scheduler_info.get("workers") + if not isinstance(workers, dict) or not workers: + return None + + n_workers = max(1, int(len(workers))) + thread_counts: list[int] = [] + memory_limits: list[int] = [] + for worker_payload in workers.values(): + if not isinstance(worker_payload, dict): + continue + raw_threads = worker_payload.get("nthreads") + raw_memory = worker_payload.get("memory_limit") + try: + parsed_threads = int(raw_threads) + if parsed_threads > 0: + thread_counts.append(parsed_threads) + except Exception: + pass + try: + parsed_memory = int(raw_memory) + if parsed_memory > 0: + memory_limits.append(parsed_memory) + except Exception: + pass + + threads_per_worker = ( + max(1, min(thread_counts)) + if thread_counts + else 1 + ) + min_worker_memory_limit = min(memory_limits) if memory_limits else None + return ( + int(n_workers), + int(threads_per_worker), + None if min_worker_memory_limit is None else int(min_worker_memory_limit), + ) def _materialize_n5_via_legacy_helper( @@ -3594,6 +3680,25 @@ def _materialize_n5_via_legacy_helper( str(scheduler_address), ] ) + else: + local_hints = _extract_client_local_cluster_hints(client) + if local_hints is not None: + n_workers, threads_per_worker, memory_limit_bytes = local_hints + command.extend( + [ + "--local-n-workers", + str(int(n_workers)), + "--local-threads-per-worker", + str(int(threads_per_worker)), + ] + ) + if memory_limit_bytes is not None: + command.extend( + [ + "--local-memory-limit", + str(int(memory_limit_bytes)), + ] + ) subprocess.run( command, check=True, diff --git a/src/clearex/io/n5_legacy_helper.py b/src/clearex/io/n5_legacy_helper.py index 098fd4e..1fe1af0 100644 --- a/src/clearex/io/n5_legacy_helper.py +++ b/src/clearex/io/n5_legacy_helper.py @@ -31,6 +31,9 @@ def main() -> int: parser.add_argument("--chunks", required=True) parser.add_argument("--pyramid-factors", required=True) parser.add_argument("--scheduler-address", default=None) + parser.add_argument("--local-n-workers", type=int, default=None) + parser.add_argument("--local-threads-per-worker", type=int, default=None) + parser.add_argument("--local-memory-limit", default=None) args = parser.parse_args() chunks = _parse_chunks(str(args.chunks)) @@ -54,6 +57,21 @@ def main() -> int: helper_client = create_dask_client( scheduler_address=scheduler_address ) + elif args.local_n_workers is not None: + helper_client = create_dask_client( + n_workers=max(1, int(args.local_n_workers)), + threads_per_worker=max( + 1, + int(args.local_threads_per_worker or 1), + ), + processes=False, + memory_limit=( + str(args.local_memory_limit).strip() + if args.local_memory_limit is not None + and str(args.local_memory_limit).strip() + else "auto" + ), + ) experiment = load_navigate_experiment(Path(args.experiment_path)) materialize_experiment_data_store( diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index dc9bc34..66550a2 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -244,6 +244,19 @@ def scheduler_info(self): ) +def test_extract_client_scheduler_address_ignores_inproc() -> None: + class _FakeScheduler: + address = "inproc://127.0.0.1/1/1" + + class _FakeClient: + scheduler = _FakeScheduler() + + def scheduler_info(self): + return {"address": "inproc://127.0.0.1/1/1"} + + assert experiment_module._extract_client_scheduler_address(_FakeClient()) is None + + def test_materialize_n5_via_legacy_helper_forwards_scheduler_address( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -292,6 +305,61 @@ def _fake_run(command, **kwargs): assert returned == output_store.with_name(f"{output_store.name}.legacy-v2.zarr") +def test_materialize_n5_via_legacy_helper_forwards_local_hints_when_inproc( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment(experiment_path, save_directory=tmp_path, file_type="N5") + experiment = load_navigate_experiment(experiment_path) + source_path = tmp_path / "CH00_000000.n5" + source_path.mkdir() + output_store = tmp_path / "CH00_000000.n5.clearex.zarr" + + monkeypatch.setattr( + experiment_module, + "_legacy_n5_helper_command_prefix", + lambda: ("/usr/bin/python3",), + ) + monkeypatch.setattr( + experiment_module, + "_extract_client_scheduler_address", + lambda _client: None, + ) + monkeypatch.setattr( + experiment_module, + "_extract_client_local_cluster_hints", + lambda _client: (5, 2, 987654321), + ) + + captured: dict[str, object] = {} + + def _fake_run(command, **kwargs): + captured["command"] = list(command) + captured["kwargs"] = dict(kwargs) + return None + + monkeypatch.setattr(experiment_module.subprocess, "run", _fake_run) + + class _Client: + pass + + _ = experiment_module._materialize_n5_via_legacy_helper( + experiment=experiment, + source_path=source_path, + output_store_path=output_store, + chunks=(1, 1, 1, 64, 64, 64), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + client=_Client(), + ) + + command = captured["command"] + assert "--scheduler-address" not in command + assert "--local-n-workers" in command + assert "--local-threads-per-worker" in command + assert "--local-memory-limit" in command + + def test_materialize_n5_via_legacy_helper_omits_scheduler_address_without_client( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/io/test_n5_legacy_helper.py b/tests/io/test_n5_legacy_helper.py index e73b357..343da1b 100644 --- a/tests/io/test_n5_legacy_helper.py +++ b/tests/io/test_n5_legacy_helper.py @@ -65,6 +65,9 @@ def _fake_materialize_experiment_data_store(**kwargs): "chunks": "1,1,1,8,8,8", "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", "scheduler_address": "tcp://scheduler:8786", + "local_n_workers": None, + "local_threads_per_worker": None, + "local_memory_limit": None, }, )(), ) @@ -125,6 +128,9 @@ def _fake_materialize_experiment_data_store(**kwargs): "chunks": "1,1,1,8,8,8", "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", "scheduler_address": "", + "local_n_workers": None, + "local_threads_per_worker": None, + "local_memory_limit": None, }, )(), ) @@ -186,6 +192,9 @@ def _fake_materialize_experiment_data_store(**kwargs): "chunks": "1,1,1,8,8,8", "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", "scheduler_address": "tcp://scheduler:8786", + "local_n_workers": None, + "local_threads_per_worker": None, + "local_memory_limit": None, }, )(), ) @@ -195,3 +204,71 @@ def _fake_materialize_experiment_data_store(**kwargs): assert captured["scheduler_address"] == "tcp://scheduler:8786" assert captured.get("client_closed", False) is True + + +def test_helper_uses_local_hints_when_scheduler_is_unavailable( + tmp_path: Path, + monkeypatch, +) -> None: + captured: dict[str, object] = {} + + class _FakeClient: + def close(self) -> None: + captured["client_closed"] = True + + def _fake_create_dask_client(**kwargs): + captured["create_kwargs"] = dict(kwargs) + return _FakeClient() + + def _fake_load_navigate_experiment(path: Path): + captured["experiment_path"] = Path(path) + return object() + + def _fake_materialize_experiment_data_store(**kwargs): + captured["materialize_kwargs"] = dict(kwargs) + return object() + + monkeypatch.setattr(helper_module, "create_dask_client", _fake_create_dask_client) + monkeypatch.setattr( + helper_module, "load_navigate_experiment", _fake_load_navigate_experiment + ) + monkeypatch.setattr( + helper_module, + "materialize_experiment_data_store", + _fake_materialize_experiment_data_store, + ) + + experiment_path = tmp_path / "experiment.yml" + source_path = tmp_path / "source.n5" + output_store = tmp_path / "out.zarr" + source_path.mkdir(parents=True) + + monkeypatch.setattr( + helper_module.argparse.ArgumentParser, + "parse_args", + lambda self: type( + "_Args", + (), + { + "experiment_path": str(experiment_path), + "source_path": str(source_path), + "output_store": str(output_store), + "chunks": "1,1,1,8,8,8", + "pyramid_factors": "[[1],[1],[1],[1],[1],[1]]", + "scheduler_address": "", + "local_n_workers": 4, + "local_threads_per_worker": 2, + "local_memory_limit": "123456789", + }, + )(), + ) + + exit_code = helper_module.main() + + assert exit_code == 0 + assert captured["create_kwargs"]["n_workers"] == 4 + assert captured["create_kwargs"]["threads_per_worker"] == 2 + assert captured["create_kwargs"]["processes"] is False + assert captured["create_kwargs"]["memory_limit"] == "123456789" + assert captured["materialize_kwargs"]["client"] is not None + assert captured.get("client_closed", False) is True